From 172f7d0a33465f4d946e8acd1b553bb808293dee Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Sat, 28 Feb 2026 18:44:13 -0500 Subject: [PATCH] deps: update bundled library version from v7.7.1 to v8.0.0 Update the bundled simdutf library from version v7.7.1 to v8.0.0. --- README.md | 2 +- SIMDUTF_VERSION | 2 +- simdutf.cpp | 14627 +++++++++++++++++----------------------------- simdutf.h | 8179 +++++++++++++++++++++++--- 4 files changed, 12830 insertions(+), 9980 deletions(-) diff --git a/README.md b/README.md index d97f2d0..bf6341e 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ simdutf library build this library with the `libsimdutf` build tag. ## simdutf version This library bundles [simdutf](https://github.com/simdutf/simdutf/) version -[v7.7.1](https://github.com/simdutf/simdutf/releases/tag/v7.7.1). +[v8.0.0](https://github.com/simdutf/simdutf/releases/tag/v8.0.0). The [SIMDUTF_VERSION](./SIMDUTF_VERSION) file contains the current version of the bundled simdutf version. diff --git a/SIMDUTF_VERSION b/SIMDUTF_VERSION index bf982a1..5f4f91f 100644 --- a/SIMDUTF_VERSION +++ b/SIMDUTF_VERSION @@ -1 +1 @@ -v7.7.1 +v8.0.0 diff --git a/simdutf.cpp b/simdutf.cpp index 09e3a21..e11ac8b 100644 --- a/simdutf.cpp +++ b/simdutf.cpp @@ -1,903 +1,9 @@ //go:build !libsimdutf -/* auto-generated on 2025-12-20 11:48:09 -0500. Do not edit! */ +/* auto-generated on 2026-01-13 09:03:21 +0100. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" -#if SIMDUTF_FEATURE_BASE64 - // We include base64_tables once. -/* begin file src/tables/base64_tables.h */ -#ifndef SIMDUTF_BASE64_TABLES_H -#define SIMDUTF_BASE64_TABLES_H -#include -#include - -namespace simdutf { -namespace { -namespace tables { -namespace base64 { -namespace base64_default { - -const char e0[256] = { - 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', - 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', - 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', - 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', - 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', - 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', - 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', - 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', - 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', - 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', - 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', - 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', - 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', - 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', - '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', - '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', - '8', '8', '8', '8', '9', '9', '9', '9', '+', '+', '+', '+', '/', '/', '/', - '/'}; - -const char e1[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', - '/'}; - -const char e2[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', - '/'}; - -const uint32_t d0[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, - 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, - 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, - 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, - 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, - 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, - 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, - 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, - 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, - 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, - 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d1[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, - 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, - 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, - 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, - 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, - 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, - 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, - 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, - 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, - 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, - 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d2[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, - 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, - 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, - 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, - 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, - 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, - 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, - 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, - 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, - 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, - 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d3[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, - 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, - 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, - 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, - 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, - 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, - 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, - 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, - 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, - 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, - 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -} // namespace base64_default - -namespace base64_url { - -const char e0[256] = { - 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', - 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', - 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', - 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', - 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', - 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', - 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', - 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', - 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', - 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', - 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', - 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', - 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', - 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', - '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', - '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', - '8', '8', '8', '8', '9', '9', '9', '9', '-', '-', '-', '-', '_', '_', '_', - '_'}; - -const char e1[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', - '_'}; - -const char e2[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', - '_'}; - -const uint32_t d0[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, - 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, - 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, - 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, - 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, - 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, - 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, - 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, - 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, - 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, - 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, - 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d1[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, - 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, - 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, - 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, - 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, - 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, - 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, - 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, - 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, - 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, - 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, - 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d2[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, - 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, - 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, - 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, - 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, - 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, - 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, - 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, - 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, - 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, - 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, - 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d3[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, - 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, - 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, - 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, - 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, - 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, - 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, - 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, - 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, - 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, - 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, - 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -} // namespace base64_url - -namespace base64_default_or_url { -const uint32_t d0[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000fc, - 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, - 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, - 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, - 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, - 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, - 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, - 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, - 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, - 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, - 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, - 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d1[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000f003, - 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, - 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, - 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, - 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, - 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, - 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, - 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, - 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, - 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, - 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, - 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d2[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00c00f00, - 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, - 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, - 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, - 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, - 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, - 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, - 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, - 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, - 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, - 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, - 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d3[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003f0000, - 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, - 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, - 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, - 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, - 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, - 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, - 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, - 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, - 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, - 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, - 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -} // namespace base64_default_or_url -const uint64_t thintable_epi8[256] = { - 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, - 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, - 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, - 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, - 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, - 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, - 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, - 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, - 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, - 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, - 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, - 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, - 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, - 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, - 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, - 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, - 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, - 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, - 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, - 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, - 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, - 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, - 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, - 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, - 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, - 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, - 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, - 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, - 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, - 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, - 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, - 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, - 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, - 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, - 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, - 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, - 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, - 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, - 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, - 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, - 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, - 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, - 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, - 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, - 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, - 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, - 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, - 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, - 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, - 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, - 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, - 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, - 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, - 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, - 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, - 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, - 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, - 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, - 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, - 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, - 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, - 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, - 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, - 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, - 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, - 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, - 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, - 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, - 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, - 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, - 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, - 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, - 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, - 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, - 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, - 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, - 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, - 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, - 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, - 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, - 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, - 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, - 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, - 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, - 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, - 0x0000000000000000, -}; - -const uint8_t pshufb_combine_table[272] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - -const unsigned char BitsSetTable256mul2[256] = { - 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, - 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, - 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, - 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, - 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, - 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, - 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, - 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, - 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, - 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, - 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, - 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, - 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, - 14, 10, 12, 12, 14, 12, 14, 14, 16}; - -constexpr uint8_t to_base64_value[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, - 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - -constexpr uint8_t to_base64_url_value[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 62, 255, 255, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - -constexpr uint8_t to_base64_default_or_url_value[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, - 62, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - -static_assert(sizeof(to_base64_value) == 256, - "to_base64_value must have 256 elements"); -static_assert(sizeof(to_base64_url_value) == 256, - "to_base64_url_value must have 256 elements"); -static_assert(to_base64_value[uint8_t(' ')] == 64, - "space must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t(' ')] == 64, - "space must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\t')] == 64, - "tab must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\t')] == 64, - "tab must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\r')] == 64, - "cr must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\r')] == 64, - "cr must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\n')] == 64, - "lf must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\n')] == 64, - "lf must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\f')] == 64, - "ff must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\f')] == 64, - "ff must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('+')] == 62, - "+ must be == 62 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('-')] == 62, - "- must be == 62 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('/')] == 63, - "/ must be == 62 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('_')] == 63, - "_ must be == 62 in to_base64_url_value"); -} // namespace base64 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // SIMDUTF_BASE64_TABLES_H -/* end file src/tables/base64_tables.h */ -#endif // SIMDUTF_FEATURE_BASE64 - /* begin file src/encoding_types.cpp */ namespace simdutf { @@ -2871,87 +1977,88 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF16 void change_endianness_utf16(const char16_t *buf, size_t length, char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; + size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - size_t binary_to_base64_with_lines(const char *input, size_t length, - char *output, size_t line_length, - base64_options options) const noexcept; + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; const char *find(const char *start, const char *end, - char character) const noexcept; + char character) const noexcept override; const char16_t *find(const char16_t *start, const char16_t *end, - char16_t character) const noexcept; -}; + char16_t character) const noexcept override; #endif // SIMDUTF_FEATURE_BASE64 +}; } // namespace arm64 } // namespace simdutf @@ -4562,95 +3669,96 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF16 void change_endianness_utf16(const char16_t *buf, size_t length, char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; + size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - size_t binary_to_base64_with_lines(const char *input, size_t length, - char *output, size_t line_length, - base64_options options) const noexcept; + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; const char *find(const char *start, const char *end, - char character) const noexcept; + char character) const noexcept override; const char16_t *find(const char16_t *start, const char16_t *end, - char16_t character) const noexcept; + char16_t character) const noexcept override; #endif // SIMDUTF_FEATURE_BASE64 }; @@ -5175,10 +4283,10 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF16 void change_endianness_utf16(const char16_t *buf, size_t length, char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept final; void to_well_formed_utf16le(const char16_t *input, size_t len, @@ -5187,87 +4295,88 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; + size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - size_t binary_to_base64_with_lines(const char *input, size_t length, - char *output, size_t line_length, - base64_options options) const noexcept; + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; const char *find(const char *start, const char *end, - char character) const noexcept; + char character) const noexcept override; const char16_t *find(const char16_t *start, const char16_t *end, - char16_t character) const noexcept; + char16_t character) const noexcept override; #endif // SIMDUTF_FEATURE_BASE64 }; @@ -6464,10 +5573,10 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF16 void change_endianness_utf16(const char16_t *buf, size_t length, char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept final; void to_well_formed_utf16le(const char16_t *input, size_t len, @@ -6476,87 +5585,88 @@ class implementation final : public simdutf::implementation { #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; + size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( - const char16_t *input, size_t length) const noexcept; + const char16_t *input, size_t length) const noexcept override; ; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; simdutf_warn_unused full_result base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; + last_chunk_handling_options::loose) const noexcept override; size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - size_t binary_to_base64_with_lines(const char *input, size_t length, - char *output, size_t line_length, - base64_options options) const noexcept; + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; const char *find(const char *start, const char *end, - char character) const noexcept; + char character) const noexcept override; const char16_t *find(const char16_t *start, const char16_t *end, - char16_t character) const noexcept; + char16_t character) const noexcept override; #endif // SIMDUTF_FEATURE_BASE64 }; @@ -9774,42 +8884,47 @@ SIMDUTF_UNTARGET_REGION #endif // SIMDUTF_RVV_H /* end file src/simdutf/rvv.h */ -/* begin file src/simdutf/lsx.h */ -#ifndef SIMDUTF_LSX_H -#define SIMDUTF_LSX_H +/* begin file src/simdutf/lasx.h */ +#ifndef SIMDUTF_LASX_H +#define SIMDUTF_LASX_H #ifdef SIMDUTF_FALLBACK_H - #error "lsx.h must be included before fallback.h" + #error "lasx.h must be included before fallback.h" #endif -#ifndef SIMDUTF_IMPLEMENTATION_LSX - #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) +#ifndef SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LSX) #endif -#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 +#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 #else - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 #endif #define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) -#if SIMDUTF_IMPLEMENTATION_LSX +#if SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_TARGET_LASX SIMDUTF_TARGET_REGION("lasx,lsx") + + // For runtime dispatching to work, we need the lsxintrin to appear + // before we call SIMDUTF_TARGET_LASX. It is unclear why. + #include namespace simdutf { /** - * Implementation for LoongArch SX. + * Implementation for LoongArch ASX. */ -namespace lsx {} // namespace lsx +namespace lasx {} // namespace lasx } // namespace simdutf -/* begin file src/simdutf/lsx/implementation.h */ -#ifndef SIMDUTF_LSX_IMPLEMENTATION_H -#define SIMDUTF_LSX_IMPLEMENTATION_H +/* begin file src/simdutf/lasx/implementation.h */ +#ifndef SIMDUTF_LASX_IMPLEMENTATION_H +#define SIMDUTF_LASX_IMPLEMENTATION_H namespace simdutf { -namespace lsx { +namespace lasx { namespace { using namespace simdutf; @@ -9818,8 +8933,9 @@ using namespace simdutf; class implementation final : public simdutf::implementation { public: simdutf_really_inline implementation() - : simdutf::implementation("lsx", "LOONGARCH SX", - internal::instruction_set::LSX) {} + : simdutf::implementation("lasx", "LOONGARCH ASX", + internal::instruction_set::LSX | + internal::instruction_set::LASX) {} #if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int detect_encodings(const char *input, size_t length) const noexcept final; @@ -10093,27 +9209,124 @@ class implementation final : public simdutf::implementation { #endif // SIMDUTF_FEATURE_BASE64 }; -} // namespace lsx +} // namespace lasx } // namespace simdutf -#endif // SIMDUTF_LSX_IMPLEMENTATION_H -/* end file src/simdutf/lsx/implementation.h */ +#endif // SIMDUTF_LASX_IMPLEMENTATION_H +/* end file src/simdutf/lasx/implementation.h */ -/* begin file src/simdutf/lsx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lsx" -// #define SIMDUTF_IMPLEMENTATION lsx +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx #define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lsx/begin.h */ + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_TARGET_LASX +#endif +/* end file src/simdutf/lasx/begin.h */ // Declarations -/* begin file src/simdutf/lsx/intrinsics.h */ -#ifndef SIMDUTF_LSX_INTRINSICS_H -#define SIMDUTF_LSX_INTRINSICS_H +/* begin file src/simdutf/lasx/intrinsics.h */ +#ifndef SIMDUTF_LASX_INTRINSICS_H +#define SIMDUTF_LASX_INTRINSICS_H // This should be the correct header whether // you use visual studio or other compilers. #include +#include + +#if defined(__loongarch_asx) + #ifdef __clang__ + #define VREGS_PREFIX "$vr" + #define XREGS_PREFIX "$xr" + #else // GCC + #define VREGS_PREFIX "$f" + #define XREGS_PREFIX "$f" + #endif + #define __ALL_REGS \ + "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ + "27,28,29,30,31" +// Convert __m128i to __m256i +static inline __m256i ____m256i(__m128i in) { + __m256i out = __lasx_xvldi(0); + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "+f"(out) + : [in] "f"(in)); + return out; +} +// Convert two __m128i to __m256i +static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { + __m256i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".ifnc %[out], %[hi] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" + " xvori.b $xr\\i, $xr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out), [hi] "+f"(inhi) + : [lo] "f"(inlo)); + return out; +} +// Convert __m256i low part to __m128i +static inline __m128i lasx_extracti128_lo(__m256i in) { + __m128i out; + __asm__ volatile(".ifnc %[out], %[in] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " vori.b $vr\\i, $vr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +// Convert __m256i high part to __m128i +static inline __m128i lasx_extracti128_hi(__m256i in) { + __m128i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +#endif /* Encoding of argument for LoongArch64 xvldi instruction. See: @@ -10143,7 +9356,7 @@ all lanes the result as 64-bit elements to all lanes */ -namespace vldi { +namespace lasx_vldi { template class const_u16 { constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); @@ -10260,7 +9473,8 @@ template class const_u64 { constexpr static int value = int((operation << 8) | byte) - 8192; constexpr static bool valid = operation != 0xffff; }; -} // namespace vldi + +} // namespace lasx_vldi // Uncomment when running under QEMU affected // by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 @@ -10270,45 +9484,78 @@ template class const_u64 { #endif #ifdef QEMU_VLDI_BUG - #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) - #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) + #define lasx_splat_u16(v) __lasx_xvreplgr2vr_h(v) + #define lasx_splat_u32(v) __lasx_xvreplgr2vr_w(v) #else -template constexpr __m128i lsx_splat_u16_aux() { +template constexpr __m256i lasx_splat_u16_aux() { constexpr bool is_imm10 = (int16_t(x) < 512) && (int16_t(x) > -512); constexpr uint16_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = vldi::const_u16::valid; - constexpr int vldi_imm = is_vldi ? vldi::const_u16::value : 0; + constexpr bool is_vldi = lasx_vldi::const_u16::valid; + constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u16::value : 0; - return is_imm10 ? __lsx_vrepli_h(int16_t(imm10)) - : is_vldi ? __lsx_vldi(vldi_imm) - : __lsx_vreplgr2vr_h(x); + return is_imm10 ? __lasx_xvrepli_h(int16_t(imm10)) + : is_vldi ? __lasx_xvldi(vldi_imm) + : __lasx_xvreplgr2vr_h(x); } -template constexpr __m128i lsx_splat_u32_aux() { +template constexpr __m256i lasx_splat_u32_aux() { constexpr bool is_imm10 = (int32_t(x) < 512) && (int32_t(x) > -512); constexpr uint32_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = vldi::const_u32::valid; - constexpr int vldi_imm = is_vldi ? vldi::const_u32::value : 0; + constexpr bool is_vldi = lasx_vldi::const_u32::valid; + constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u32::value : 0; - return is_imm10 ? __lsx_vrepli_w(int32_t(imm10)) - : is_vldi ? __lsx_vldi(vldi_imm) - : __lsx_vreplgr2vr_w(x); + return is_imm10 ? __lasx_xvrepli_w(int32_t(imm10)) + : is_vldi ? __lasx_xvldi(vldi_imm) + : __lasx_xvreplgr2vr_w(x); } - #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() - #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() + #define lasx_splat_u16(v) lasx_splat_u16_aux<(v)>() + #define lasx_splat_u32(v) lasx_splat_u32_aux<(v)>() #endif // QEMU_VLDI_BUG -#endif // SIMDUTF_LSX_INTRINSICS_H -/* end file src/simdutf/lsx/intrinsics.h */ -/* begin file src/simdutf/lsx/bitmanipulation.h */ -#ifndef SIMDUTF_LSX_BITMANIPULATION_H -#define SIMDUTF_LSX_BITMANIPULATION_H +#ifndef lsx_splat_u16 + #ifdef QEMU_VLDI_BUG + #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) + #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) + #else +namespace { +template constexpr __m128i lsx_splat_u16_aux() { + return ((int16_t(x) < 512) && (int16_t(x) > -512)) + ? __lsx_vrepli_h( + ((int16_t(x) < 512) && (int16_t(x) > -512)) ? int16_t(x) : 0) + : (lasx_vldi::const_u16::valid + ? __lsx_vldi(lasx_vldi::const_u16::valid + ? lasx_vldi::const_u16::value + : 0) + : __lsx_vreplgr2vr_h(x)); +} + +template constexpr __m128i lsx_splat_u32_aux() { + return ((int32_t(x) < 512) && (int32_t(x) > -512)) + ? __lsx_vrepli_w( + ((int32_t(x) < 512) && (int32_t(x) > -512)) ? int32_t(x) : 0) + : (lasx_vldi::const_u32::valid + ? __lsx_vldi(lasx_vldi::const_u32::valid + ? lasx_vldi::const_u32::value + : 0) + : __lsx_vreplgr2vr_w(x)); +} +} // namespace + #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() + #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() + #endif // QEMU_VLDI_BUG +#endif // lsx_splat_u16 + +#endif // SIMDUTF_LASX_INTRINSICS_H +/* end file src/simdutf/lasx/intrinsics.h */ +/* begin file src/simdutf/lasx/bitmanipulation.h */ +#ifndef SIMDUTF_LASX_BITMANIPULATION_H +#define SIMDUTF_LASX_BITMANIPULATION_H #include namespace simdutf { -namespace lsx { +namespace lasx { namespace { simdutf_really_inline int count_ones(uint64_t input_num) { @@ -10322,171 +9569,349 @@ simdutf_really_inline int trailing_zeroes(uint64_t input_num) { #endif } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf -#endif // SIMDUTF_LSX_BITMANIPULATION_H -/* end file src/simdutf/lsx/bitmanipulation.h */ -/* begin file src/simdutf/lsx/simd.h */ -#ifndef SIMDUTF_LSX_SIMD_H -#define SIMDUTF_LSX_SIMD_H +#endif // SIMDUTF_LASX_BITMANIPULATION_H +/* end file src/simdutf/lasx/bitmanipulation.h */ +/* begin file src/simdutf/lasx/simd.h */ +#ifndef SIMDUTF_LASX_SIMD_H +#define SIMDUTF_LASX_SIMD_H namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace simd { -template struct simd8; +__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, + {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, + {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, + {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, + {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, +}; -// -// Base class of simd8 and simd8, both of which use __m128i -// internally. -// -template > struct base_u8 { - __m128i value; - static const int SIZE = sizeof(value); +__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { + {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; - // Conversion from/to SIMD register - simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} - simdutf_really_inline operator const __m128i &() const { return this->value; } - simdutf_really_inline operator __m128i &() { return this->value; } +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m256i &() const { return this->value; } + simdutf_really_inline operator __m256i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + if (big_endian) { + __m256i zero = __lasx_xvldi(0); + __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); + __m256i inlow = __lasx_xvilvl_b(in8, zero); + __m256i inhigh = __lasx_xvilvh_b(in8, zero); + __lasx_xvst(inlow, reinterpret_cast(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); + } else { + __m256i inlow = __lasx_vext2xv_hu_bu(this->value); + __m256i inhigh = __lasx_vext2xv_hu_bu( + __lasx_xvpermi_q(this->value, this->value, 0b00000001)); + __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); + } + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); + __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); + + __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); + + __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); + __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); + } // Bit operations - simdutf_really_inline simd8 operator|(const simd8 other) const { - return __lsx_vor_v(this->value, other); + simdutf_really_inline Child operator|(const Child other) const { + return __lasx_xvor_v(this->value, other); } - simdutf_really_inline simd8 operator&(const simd8 other) const { - return __lsx_vand_v(this->value, other); + simdutf_really_inline Child operator&(const Child other) const { + return __lasx_xvand_v(this->value, other); } - simdutf_really_inline simd8 operator^(const simd8 other) const { - return __lsx_vxor_v(this->value, other); + simdutf_really_inline Child operator^(const Child other) const { + return __lasx_xvxor_v(this->value, other); } - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd8 &operator|=(const simd8 other) { - auto this_cast = static_cast *>(this); + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } +}; + +template struct simd8; +template > +struct base8 : base> { + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} friend simdutf_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return __lsx_vseq_b(lhs, rhs); + return __lasx_xvseq_b(lhs, rhs); } - template + static const int SIZE = sizeof(base::value); + + template simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + static_assert(N <= 16, "unsupported shift value"); + + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 16) { + shuf = __lasx_xvld(prev_shuf_table[N], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 16) { + return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); + } } }; // SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - +template <> struct simd8 : base8 { static simdutf_really_inline simd8 splat(bool _value) { - return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } - simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} - // False constructor - simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); - } + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} simdutf_really_inline uint32_t to_bitmask() const { - return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); + __m256i mask = __lasx_xvmsknz_b(this->value); + uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline bool any() const { + if (__lasx_xbz_b(this->value)) + return false; + return true; } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } }; -// Unsigned bytes -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(uint8_t _value) { - return __lsx_vreplgr2vr_b(_value); +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const uint8_t *values) { - return __lsx_vld(values, 0); + static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); } - simdutf_really_inline simd8(const __m128i _value) - : base_u8(_value) {} - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); } - // Order-specific operations - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lsx_vsle_bu(other, *this); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return __lsx_vslt_bu(other, *this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - value = __lsx_vsub_b(value, other.value); - return *this; - } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return simd8(*this > other); - } + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} - // Bit-specific operations - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); - } - simdutf_really_inline bool is_ascii() const { - return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); } - simdutf_really_inline bool any_bits_set_anywhere() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; - } - template simdutf_really_inline simd8 shr() const { - return __lsx_vsrli_b(this->value, N); - } - template simdutf_really_inline simd8 shl() const { - return __lsx_vslli_b(this->value, N); - } + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); + __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); + return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); } template @@ -10500,150 +9925,115 @@ template <> struct simd8 : base_u8 { replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15)); } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); - } - - simdutf_really_inline uint64_t sum_bytes() const { - const auto sum_u16 = __lsx_vhaddw_hu_bu(value, value); - const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + - uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); - } }; // Signed bytes -template <> struct simd8 { - __m128i value; - - static const int SIZE = sizeof(value); - - static simdutf_really_inline simd8 splat(int8_t _value) { - return __lsx_vreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const int8_t values[16]) { - return __lsx_vld(values, 0); - } +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - __m128i zero = __lsx_vldi(0); - if simdutf_constexpr (match_system(big_endian)) { - __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), - reinterpret_cast(p + 8), 0); - } else { - __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), - reinterpret_cast(p + 8), 0); - } + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); - __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - __lsx_vst(in32_0, reinterpret_cast(p), 0); - __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); - __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); - __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lasx_xvslt_b(other, this->value); } - - // In places where the table can be reused, which is most uses in simdutf, it - // is worth it to do 4 table lookups, as there is no direct zero extension - // from u8 to u32. - simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { - const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, - 2, 255, 255, 255, 3, 255, 255, 255}; - const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, - 6, 255, 255, 255, 7, 255, 255, 255}; - const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, - 10, 255, 255, 255, 11, 255, 255, 255}; - const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, - 14, 255, 255, 255, 15, 255, 255, 255}; - - // encourage store pairing and interleaving - const auto shuf1 = this->apply_lookup_16_to(tb1); - const auto shuf2 = this->apply_lookup_16_to(tb2); - shuf1.store(reinterpret_cast(p)); - shuf2.store(reinterpret_cast(p + 4)); - - const auto shuf3 = this->apply_lookup_16_to(tb3); - const auto shuf4 = this->apply_lookup_16_to(tb4); - shuf3.store(reinterpret_cast(p + 8)); - shuf4.store(reinterpret_cast(p + 12)); + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lasx_xvslt_b(this->value, other); } - // Conversion from/to SIMD register - simdutf_really_inline simd8(const __m128i _value) : value(_value) {} +}; - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} - // Store to array - simdutf_really_inline void store(int8_t dst[16]) const { - return __lsx_vst(value, dst, 0); + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lasx_xvssub_bu(this->value, other); } - simdutf_really_inline operator simd8() const { - return ((__m128i)this->value); + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); } - - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return __lsx_vor_v((__m128i)value, (__m128i)other.value); + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lasx_xvsle_bu(other, *this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + value = __lasx_xvsub_b(value, other.value); + return *this; } + // Bit-specific operations simdutf_really_inline bool is_ascii() const { - return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == - 0xffff); + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; } - - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lsx_vslt_b((__m128i)other.value, (__m128i)value); + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lsx_vslt_b((__m128i)value, (__m128i)other.value); + template simdutf_really_inline simd8 shr() const { + return __lasx_xvsrli_b(this->value, N); } - - template - simdutf_really_inline simd8 - prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + template simdutf_really_inline simd8 shl() const { + return __lasx_xvslli_b(this->value, N); } - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, - simd8(original_tmp)); + simdutf_really_inline uint64_t sum_bytes() const { + const auto sum_u16 = __lasx_xvhaddw_hu_bu(value, value); + const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); } }; +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert( - NUM_CHUNKS == 4, - "LoongArch kernel should use four registers per 64-byte block."); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64 &o) = delete; // no copy allowed @@ -10651,36 +10041,36 @@ template struct simd8x64 { operator=(const simd8 other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} simdutf_really_inline simd8x64(const T *ptr) : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} simdutf_really_inline void store(T *ptr) const { this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); } simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { this->chunks[0] |= other.chunks[0]; this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; return *this; } simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); + return this->chunks[0] | this->chunks[1]; } - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } template simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { @@ -10688,152 +10078,117 @@ template struct simd8x64 { sizeof(simd8) * 0); this->chunks[1].template store_ascii_as_utf16(ptr + sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); } simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); - mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); - return __lsx_vpickve2gr_du(mask, 0); + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); } simdutf_really_inline uint64_t lt(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) .to_bitmask(); } + simdutf_really_inline uint64_t gt(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) .to_bitmask(); } simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { const simd8 mask = simd8::splat(m); - return simd8x64(simd8(this->chunks[0].value) >= mask, - simd8(this->chunks[1].value) >= mask, - simd8(this->chunks[2].value) >= mask, - simd8(this->chunks[3].value) >= mask) + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) .to_bitmask(); } }; // struct simd8x64 -/* begin file src/simdutf/lsx/simd16-inl.h */ +/* begin file src/simdutf/lasx/simd16-inl.h */ template struct simd16; -template > struct base_u16 { - __m128i value; - static const size_t SIZE = sizeof(value); - static const size_t ELEMENTS = sizeof(value) / sizeof(T); - - // Conversion from/to SIMD register - simdutf_really_inline base_u16() = default; - simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} - // Bit operations - simdutf_really_inline simd16 operator|(const simd16 other) const { - return __lsx_vor_v(this->value, other.value); - } - simdutf_really_inline simd16 operator&(const simd16 other) const { - return __lsx_vand_v(this->value, other.value); - } - simdutf_really_inline simd16 operator~() const { - return __lsx_vxori_b(this->value, 0xFF); - } - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return __lsx_vseq_h(lhs.value, rhs.value); - } - - template - simdutf_really_inline simd16 byte_right_shift() const { - return __lsx_vbsrl_v(this->value, N); - } - - simdutf_really_inline uint16_t first() const { - return uint16_t(__lsx_vpickve2gr_w(value, 0)); - } -}; - template > -struct base16 : base_u16 { - using bitmask_type = uint16_t; +struct base16 : base> { + using bitmask_type = uint32_t; - simdutf_really_inline base16() : base_u16() {} - simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} template simdutf_really_inline base16(const Pointer *ptr) - : base16(__lsx_vld(ptr, 0)) {} + : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} - static const int SIZE = sizeof(base_u16::value); + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), - __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); - } + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); }; // SIMD byte mask type (returned by things like eq and gt) template <> struct simd16 : base16 { static simdutf_really_inline simd16 splat(bool _value) { - return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); + return __lasx_xvreplgr2vr_h(uint16_t(-(!!_value))); } simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} simdutf_really_inline bitmask_type to_bitmask() const { - __m128i mask = __lsx_vmsknz_b(this->value); - bitmask_type mask0 = bitmask_type(__lsx_vpickve2gr_wu(mask, 0)); - return mask0; + __m256i mask = __lasx_xvmsknz_b(this->value); + bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); + bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } - simdutf_really_inline bool is_zero() const { return __lsx_bz_v(this->value); } + simdutf_really_inline bool is_zero() const { + return __lasx_xbz_v(this->value); + } + + template simdutf_really_inline simd16 byte_right_shift() const { + const auto t0 = __lasx_xvbsrl_v(this->value, N); + const auto t1 = __lasx_xvpermi_q(this->value, __lasx_xvldi(0), 0b00000011); + const auto t2 = __lasx_xvbsll_v(t1, 16 - N); + const auto t3 = __lasx_xvor_v(t0, t2); + return t3; + } + + simdutf_really_inline uint16_t first() const { + return uint16_t(__lasx_xvpickve2gr_w(value, 0)); + } }; template struct base16_numeric : base16 { static simdutf_really_inline simd16 splat(T _value) { - return __lsx_vreplgr2vr_h(_value); + return __lasx_xvreplgr2vr_h((uint16_t)_value); } - static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } - + static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } template static simdutf_really_inline simd16 load(const Pointer values) { - return __lsx_vld(values, 0); + return __lasx_xvld(values, 0); } - simdutf_really_inline base16_numeric(const __m128i _value) + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m256i _value) : base16(_value) {} // Store to array simdutf_really_inline void store(T dst[8]) const { - return __lsx_vst(this->value, dst, 0); + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); } // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { - return __lsx_vxori_b(this->value, 0xFF); - } + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } }; -// Unsigned code unitstemplate<> +// Unsigned code units template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric((__m128i)_value) {} + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} // Splat constructor simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} @@ -10843,67 +10198,71 @@ template <> struct simd16 : base16_numeric { simdutf_really_inline simd16(const char16_t *values) : simd16(load(reinterpret_cast(values))) {} - // Copy constructor - simdutf_really_inline simd16(const simd16 mask) : simd16(mask.value) {} - // Order-specific operations simdutf_really_inline simd16 &operator+=(const simd16 other) { - value = __lsx_vadd_h(value, other.value); + value = __lasx_xvadd_h(value, other.value); return *this; } + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lasx_xvshuf4i_b(this->value, 0b10110001); + } + template static simdutf_really_inline simd8 pack_shifted_right(const simd16 &v0, const simd16 &v1) { - return __lsx_vssrlni_bu_h(v1.value, v0.value, N); + return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, N), + 0b11011000); } // Pack with the unsigned saturation of two uint16_t code units into single // uint8_t vector static simdutf_really_inline simd8 pack(const simd16 &v0, const simd16 &v1) { - return pack_shifted_right<0>(v0, v1); - } - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lsx_vshuf4i_b(this->value, 0b10110001); + return pack_shifted_right<0>(v0, v1); } simdutf_really_inline uint64_t sum() const { - const auto sum_u32 = __lsx_vhaddw_wu_hu(value, value); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + const auto sum_u32 = __lasx_xvhaddw_wu_hu(value, value); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); - return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + - uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); + return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); + } + + template simdutf_really_inline simd16 byte_right_shift() const { + return __lasx_xvbsrl_v(this->value, N); } }; simdutf_really_inline simd16 operator<(const simd16 a, const simd16 b) { - return __lsx_vslt_hu(a.value, b.value); + return __lasx_xvslt_hu(a.value, b.value); } simdutf_really_inline simd16 operator>(const simd16 a, const simd16 b) { - return __lsx_vslt_hu(b.value, a.value); + return __lasx_xvslt_hu(b.value, a.value); } simdutf_really_inline simd16 operator<=(const simd16 a, const simd16 b) { - return __lsx_vsle_hu(a.value, b.value); + return __lasx_xvsle_hu(a.value, b.value); } simdutf_really_inline simd16 operator>=(const simd16 a, const simd16 b) { - return __lsx_vsle_hu(b.value, a.value); + return __lasx_xvsle_hu(b.value, a.value); } template struct simd16x32 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert( - NUM_CHUNKS == 4, - "LOONGARCH kernel should use four registers per 64-byte block."); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); simd16 chunks[NUM_CHUNKS]; simd16x32(const simd16x32 &o) = delete; // no copy allowed @@ -10911,127 +10270,139 @@ template struct simd16x32 { operator=(const simd16 other) = delete; // no assignment allowed simd16x32() = delete; // no default constructor allowed - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} simdutf_really_inline simd16x32(const T *ptr) : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} simdutf_really_inline void store(T *ptr) const { this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); } simdutf_really_inline void swap_bytes() { this->chunks[0] = this->chunks[0].swap_bytes(); this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); } simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); } simdutf_really_inline uint64_t lteq(const T m) const { const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) .to_bitmask(); } }; // struct simd16x32 -simdutf_really_inline simd16 operator^(const simd16 a, - uint16_t b) { - const auto bv = __lsx_vreplgr2vr_h(b); - return __lsx_vxor_v(a.value, bv); -} - -simdutf_really_inline simd16 operator^(const simd16 a, - const simd16 b) { - return __lsx_vxor_v(a.value, b.value); -} - simdutf_really_inline simd16 min(const simd16 a, const simd16 b) { - return __lsx_vmin_hu(a.value, b.value); + return __lasx_xvmin_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator==(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvseq_h(a.value, bv); } simdutf_really_inline simd16 as_vector_u16(const simd16 x) { return x.value; } -/* end file src/simdutf/lsx/simd16-inl.h */ -/* begin file src/simdutf/lsx/simd32-inl.h */ + +simdutf_really_inline simd16 operator&(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvand_v(a.value, bv); +} + +simdutf_really_inline simd16 operator&(const simd16 a, + const simd16 b) { + return __lasx_xvand_v(a.value, b.value); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvxor_v(a.value, bv); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + const simd16 b) { + return __lasx_xvxor_v(a.value, b.value); +} +/* end file src/simdutf/lasx/simd16-inl.h */ +/* begin file src/simdutf/lasx/simd32-inl.h */ template struct simd32; template <> struct simd32 { - __m128i value; + __m256i value; static const int SIZE = sizeof(value); static const int ELEMENTS = SIZE / sizeof(uint32_t); // constructors - simdutf_really_inline simd32(__m128i v) : value(v) {} + simdutf_really_inline simd32(__m256i v) : value(v) {} template - simdutf_really_inline simd32(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} + simdutf_really_inline simd32(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} // in-place operators simdutf_really_inline simd32 &operator-=(const simd32 other) { - value = __lsx_vsub_w(value, other.value); + value = __lasx_xvsub_w(value, other.value); return *this; } // members simdutf_really_inline uint64_t sum() const { - return uint64_t(__lsx_vpickve2gr_wu(value, 0)) + - uint64_t(__lsx_vpickve2gr_wu(value, 1)) + - uint64_t(__lsx_vpickve2gr_wu(value, 2)) + - uint64_t(__lsx_vpickve2gr_wu(value, 3)); + const auto odd = __lasx_xvsrli_d(value, 32); + const auto even = __lasx_xvand_v(value, __lasx_xvreplgr2vr_d(0xffffffff)); + + const auto sum64 = __lasx_xvadd_d(odd, even); + + return uint64_t(__lasx_xvpickve2gr_du(sum64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 3)); } // static members static simdutf_really_inline simd32 splat(uint32_t x) { - return __lsx_vreplgr2vr_w(x); + return __lasx_xvreplgr2vr_w(x); } static simdutf_really_inline simd32 zero() { - return __lsx_vrepli_w(0); + return __lasx_xvrepli_w(0); } }; // ------------------------------------------------------------ template <> struct simd32 { - __m128i value; + __m256i value; static const int SIZE = sizeof(value); // constructors - simdutf_really_inline simd32(__m128i v) : value(v) {} + simdutf_really_inline simd32(__m256i v) : value(v) {} }; // ------------------------------------------------------------ simdutf_really_inline simd32 operator&(const simd32 a, const simd32 b) { - return __lsx_vor_v(a.value, b.value); + return __lasx_xvor_v(a.value, b.value); } simdutf_really_inline simd32 operator<(const simd32 a, const simd32 b) { - return __lsx_vslt_wu(a.value, b.value); + return __lasx_xvslt_wu(a.value, b.value); } simdutf_really_inline simd32 operator>(const simd32 a, const simd32 b) { - return __lsx_vslt_wu(b.value, a.value); + return __lasx_xvslt_wu(b.value, a.value); } // ------------------------------------------------------------ @@ -11039,112 +10410,128 @@ simdutf_really_inline simd32 operator>(const simd32 a, simdutf_really_inline simd32 as_vector_u32(const simd32 v) { return v.value; } -/* end file src/simdutf/lsx/simd32-inl.h */ -/* begin file src/simdutf/lsx/simd64-inl.h */ +/* end file src/simdutf/lasx/simd32-inl.h */ +/* begin file src/simdutf/lasx/simd64-inl.h */ template struct simd64; template <> struct simd64 { - __m128i value; + __m256i value; static const int SIZE = sizeof(value); static const int ELEMENTS = SIZE / sizeof(uint64_t); // constructors - simdutf_really_inline simd64(__m128i v) : value(v) {} + simdutf_really_inline simd64(__m256i v) : value(v) {} template - simdutf_really_inline simd64(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} + simdutf_really_inline simd64(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} // in-place operators simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = __lsx_vadd_d(value, other.value); + value = __lasx_xvadd_d(value, other.value); return *this; } // members simdutf_really_inline uint64_t sum() const { - return uint64_t(__lsx_vpickve2gr_du(value, 0)) + - uint64_t(__lsx_vpickve2gr_du(value, 1)); + return uint64_t(__lasx_xvpickve2gr_du(value, 0)) + + uint64_t(__lasx_xvpickve2gr_du(value, 1)) + + uint64_t(__lasx_xvpickve2gr_du(value, 2)) + + uint64_t(__lasx_xvpickve2gr_du(value, 3)); } // static members static simdutf_really_inline simd64 zero() { - return __lsx_vrepli_d(0); + return __lasx_xvrepli_d(0); } }; // ------------------------------------------------------------ template <> struct simd64 { - __m128i value; + __m256i value; static const int SIZE = sizeof(value); // constructors - simdutf_really_inline simd64(__m128i v) : value(v) {} + simdutf_really_inline simd64(__m256i v) : value(v) {} }; // ------------------------------------------------------------ simd64 sum_8bytes(const simd8 v) { - const auto sum_u16 = __lsx_vhaddw_hu_bu(v, v); - const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + const auto sum_u16 = __lasx_xvhaddw_hu_bu(v, v); + const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); return simd64(sum_u64); } -/* end file src/simdutf/lsx/simd64-inl.h */ +/* end file src/simdutf/lasx/simd64-inl.h */ } // namespace simd } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf -#endif // SIMDUTF_LSX_SIMD_H -/* end file src/simdutf/lsx/simd.h */ +#endif // SIMDUTF_LASX_SIMD_H +/* end file src/simdutf/lasx/simd.h */ -/* begin file src/simdutf/lsx/end.h */ +/* begin file src/simdutf/lasx/end.h */ #undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lsx/end.h */ -#endif // SIMDUTF_IMPLEMENTATION_LSX +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif +/* end file src/simdutf/lasx/end.h */ -#endif // SIMDUTF_LSX_H -/* end file src/simdutf/lsx.h */ -/* begin file src/simdutf/lasx.h */ -#ifndef SIMDUTF_LASX_H -#define SIMDUTF_LASX_H +#endif // SIMDUTF_IMPLEMENTATION_LASX + +#endif // SIMDUTF_LASX_H +/* end file src/simdutf/lasx.h */ +/* begin file src/simdutf/lsx.h */ +#ifndef SIMDUTF_LSX_H +#define SIMDUTF_LSX_H #ifdef SIMDUTF_FALLBACK_H - #error "lasx.h must be included before fallback.h" + #error "lsx.h must be included before fallback.h" +#endif + +#ifndef SIMDUTF_CAN_ALWAYS_RUN_LASX + #error "lsx.h must be included after lasx.h" #endif -#ifndef SIMDUTF_IMPLEMENTATION_LASX - #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LASX) +#ifndef SIMDUTF_IMPLEMENTATION_LSX + #if SIMDUTF_CAN_ALWAYS_RUN_LASX + #define SIMDUTF_IMPLEMENTATION_LSX 0 + #else + #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) + #endif #endif -#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 +#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 #else - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 #endif #define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) -#if SIMDUTF_IMPLEMENTATION_LASX +#if SIMDUTF_IMPLEMENTATION_LSX namespace simdutf { /** - * Implementation for LoongArch ASX. + * Implementation for LoongArch SX. */ -namespace lasx {} // namespace lasx +namespace lsx {} // namespace lsx } // namespace simdutf -/* begin file src/simdutf/lasx/implementation.h */ -#ifndef SIMDUTF_LASX_IMPLEMENTATION_H -#define SIMDUTF_LASX_IMPLEMENTATION_H +/* begin file src/simdutf/lsx/implementation.h */ +#ifndef SIMDUTF_LSX_IMPLEMENTATION_H +#define SIMDUTF_LSX_IMPLEMENTATION_H namespace simdutf { -namespace lasx { +namespace lsx { namespace { using namespace simdutf; @@ -11153,9 +10540,8 @@ using namespace simdutf; class implementation final : public simdutf::implementation { public: simdutf_really_inline implementation() - : simdutf::implementation("lasx", "LOONGARCH ASX", - internal::instruction_set::LSX | - internal::instruction_set::LASX) {} + : simdutf::implementation("lsx", "LOONGARCH SX", + internal::instruction_set::LSX) {} #if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int detect_encodings(const char *input, size_t length) const noexcept final; @@ -11429,118 +10815,27 @@ class implementation final : public simdutf::implementation { #endif // SIMDUTF_FEATURE_BASE64 }; -} // namespace lasx +} // namespace lsx } // namespace simdutf -#endif // SIMDUTF_LASX_IMPLEMENTATION_H -/* end file src/simdutf/lasx/implementation.h */ +#endif // SIMDUTF_LSX_IMPLEMENTATION_H +/* end file src/simdutf/lsx/implementation.h */ -/* begin file src/simdutf/lasx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lasx" -// #define SIMDUTF_IMPLEMENTATION lasx +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx #define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lasx/begin.h */ +/* end file src/simdutf/lsx/begin.h */ // Declarations -/* begin file src/simdutf/lasx/intrinsics.h */ -#ifndef SIMDUTF_LASX_INTRINSICS_H -#define SIMDUTF_LASX_INTRINSICS_H +/* begin file src/simdutf/lsx/intrinsics.h */ +#ifndef SIMDUTF_LSX_INTRINSICS_H +#define SIMDUTF_LSX_INTRINSICS_H // This should be the correct header whether // you use visual studio or other compilers. #include -#include - -#if defined(__loongarch_asx) - #ifdef __clang__ - #define VREGS_PREFIX "$vr" - #define XREGS_PREFIX "$xr" - #else // GCC - #define VREGS_PREFIX "$f" - #define XREGS_PREFIX "$f" - #endif - #define __ALL_REGS \ - "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ - "27,28,29,30,31" -// Convert __m128i to __m256i -static inline __m256i ____m256i(__m128i in) { - __m256i out = __lasx_xvldi(0); - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "+f"(out) - : [in] "f"(in)); - return out; -} -// Convert two __m128i to __m256i -static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { - __m256i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".ifnc %[out], %[hi] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" - " xvori.b $xr\\i, $xr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out), [hi] "+f"(inhi) - : [lo] "f"(inlo)); - return out; -} -// Convert __m256i low part to __m128i -static inline __m128i lasx_extracti128_lo(__m256i in) { - __m128i out; - __asm__ volatile(".ifnc %[out], %[in] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " vori.b $vr\\i, $vr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -// Convert __m256i high part to __m128i -static inline __m128i lasx_extracti128_hi(__m256i in) { - __m128i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -#endif /* Encoding of argument for LoongArch64 xvldi instruction. See: @@ -11570,7 +10865,7 @@ all lanes the result as 64-bit elements to all lanes */ -namespace lasx_vldi { +namespace vldi { template class const_u16 { constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); @@ -11687,8 +10982,7 @@ template class const_u64 { constexpr static int value = int((operation << 8) | byte) - 8192; constexpr static bool valid = operation != 0xffff; }; - -} // namespace lasx_vldi +} // namespace vldi // Uncomment when running under QEMU affected // by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 @@ -11697,46 +10991,48 @@ template class const_u64 { // #define QEMU_VLDI_BUG 1 #endif -#ifdef QEMU_VLDI_BUG - #define lasx_splat_u16(v) __lasx_xvreplgr2vr_h(v) - #define lasx_splat_u32(v) __lasx_xvreplgr2vr_w(v) -#else -template constexpr __m256i lasx_splat_u16_aux() { - constexpr bool is_imm10 = (int16_t(x) < 512) && (int16_t(x) > -512); - constexpr uint16_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = lasx_vldi::const_u16::valid; - constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u16::value : 0; - - return is_imm10 ? __lasx_xvrepli_h(int16_t(imm10)) - : is_vldi ? __lasx_xvldi(vldi_imm) - : __lasx_xvreplgr2vr_h(x); +#ifndef lsx_splat_u16 + #ifdef QEMU_VLDI_BUG + #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) + #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) + #else +namespace { +template constexpr __m128i lsx_splat_u16_aux() { + return ((int16_t(x) < 512) && (int16_t(x) > -512)) + ? __lsx_vrepli_h( + ((int16_t(x) < 512) && (int16_t(x) > -512)) ? int16_t(x) : 0) + : (vldi::const_u16::valid + ? __lsx_vldi(vldi::const_u16::valid + ? vldi::const_u16::value + : 0) + : __lsx_vreplgr2vr_h(x)); } -template constexpr __m256i lasx_splat_u32_aux() { - constexpr bool is_imm10 = (int32_t(x) < 512) && (int32_t(x) > -512); - constexpr uint32_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = lasx_vldi::const_u32::valid; - constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u32::value : 0; - - return is_imm10 ? __lasx_xvrepli_w(int32_t(imm10)) - : is_vldi ? __lasx_xvldi(vldi_imm) - : __lasx_xvreplgr2vr_w(x); +template constexpr __m128i lsx_splat_u32_aux() { + return ((int32_t(x) < 512) && (int32_t(x) > -512)) + ? __lsx_vrepli_w( + ((int32_t(x) < 512) && (int32_t(x) > -512)) ? int32_t(x) : 0) + : (vldi::const_u32::valid + ? __lsx_vldi(vldi::const_u32::valid + ? vldi::const_u32::value + : 0) + : __lsx_vreplgr2vr_w(x)); } - - #define lasx_splat_u16(v) lasx_splat_u16_aux<(v)>() - #define lasx_splat_u32(v) lasx_splat_u32_aux<(v)>() -#endif // QEMU_VLDI_BUG - -#endif // SIMDUTF_LASX_INTRINSICS_H -/* end file src/simdutf/lasx/intrinsics.h */ -/* begin file src/simdutf/lasx/bitmanipulation.h */ -#ifndef SIMDUTF_LASX_BITMANIPULATION_H -#define SIMDUTF_LASX_BITMANIPULATION_H +} // namespace + #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() + #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() + #endif // QEMU_VLDI_BUG +#endif // lsx_splat_u16 +#endif // SIMDUTF_LSX_INTRINSICS_H +/* end file src/simdutf/lsx/intrinsics.h */ +/* begin file src/simdutf/lsx/bitmanipulation.h */ +#ifndef SIMDUTF_LSX_BITMANIPULATION_H +#define SIMDUTF_LSX_BITMANIPULATION_H #include namespace simdutf { -namespace lasx { +namespace lsx { namespace { simdutf_really_inline int count_ones(uint64_t input_num) { @@ -11750,349 +11046,171 @@ simdutf_really_inline int trailing_zeroes(uint64_t input_num) { #endif } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf -#endif // SIMDUTF_LASX_BITMANIPULATION_H -/* end file src/simdutf/lasx/bitmanipulation.h */ -/* begin file src/simdutf/lasx/simd.h */ -#ifndef SIMDUTF_LASX_SIMD_H -#define SIMDUTF_LASX_SIMD_H +#endif // SIMDUTF_LSX_BITMANIPULATION_H +/* end file src/simdutf/lsx/bitmanipulation.h */ +/* begin file src/simdutf/lsx/simd.h */ +#ifndef SIMDUTF_LSX_SIMD_H +#define SIMDUTF_LSX_SIMD_H namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace simd { -__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, - {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, - {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, - {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, - 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, - {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, - {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, - {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, - {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, - {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, -}; - -__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { - {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; - -// Forward-declared so they can be used by splat and friends. -template struct base { - __m256i value; - - // Zero constructor - simdutf_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m256i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m256i &() const { return this->value; } - simdutf_really_inline operator __m256i &() { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - if (big_endian) { - __m256i zero = __lasx_xvldi(0); - __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); - __m256i inlow = __lasx_xvilvl_b(in8, zero); - __m256i inhigh = __lasx_xvilvh_b(in8, zero); - __lasx_xvst(inlow, reinterpret_cast(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); - } else { - __m256i inlow = __lasx_vext2xv_hu_bu(this->value); - __m256i inhigh = __lasx_vext2xv_hu_bu( - __lasx_xvpermi_q(this->value, this->value, 0b00000001)); - __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); - } - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); - __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); +template struct simd8; - __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); - __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); - __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); +// +// Base class of simd8 and simd8, both of which use __m128i +// internally. +// +template > struct base_u8 { + __m128i value; + static const int SIZE = sizeof(value); - __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); - __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); - __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } + simdutf_really_inline operator __m128i &() { return this->value; } - __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); - __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); - __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); - } // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return __lasx_xvor_v(this->value, other); + simdutf_really_inline simd8 operator|(const simd8 other) const { + return __lsx_vor_v(this->value, other); } - simdutf_really_inline Child operator&(const Child other) const { - return __lasx_xvand_v(this->value, other); + simdutf_really_inline simd8 operator&(const simd8 other) const { + return __lsx_vand_v(this->value, other); } - simdutf_really_inline Child operator^(const Child other) const { - return __lasx_xvxor_v(this->value, other); + simdutf_really_inline simd8 operator^(const simd8 other) const { + return __lsx_vxor_v(this->value, other); } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); *this_cast = *this_cast | other; return *this_cast; } -}; - -template struct simd8; -template > -struct base8 : base> { - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m256i _value) : base>(_value) {} friend simdutf_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return __lasx_xvseq_b(lhs, rhs); + return __lsx_vseq_b(lhs, rhs); } - static const int SIZE = sizeof(base::value); - - template + template simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - static_assert(N <= 16, "unsupported shift value"); - - if (!N) - return this->value; - - __m256i zero = __lasx_xvldi(0); - __m256i result, shuf; - if (N < 16) { - shuf = __lasx_xvld(prev_shuf_table[N], 0); - - result = __lasx_xvshuf_b( - __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, - shuf); - __m256i srl_prev = __lasx_xvbsrl_v( - __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); - __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); - result = __lasx_xvbitsel_v(result, srl_prev, mask); - - return result; - } else if (N == 16) { - return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); - } + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); } }; // SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { +template <> struct simd8 : base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + static simdutf_really_inline simd8 splat(bool _value) { - return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); } - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } simdutf_really_inline uint32_t to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline bool any() const { - if (__lasx_xbz_b(this->value)) - return false; - return true; + return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } }; -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return __lasx_xvreplgr2vr_b(_value); +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return __lsx_vreplgr2vr_b(_value); } - static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } - static simdutf_really_inline simd8 load(const T values[32]) { - return __lasx_xvld(reinterpret_cast(values), 0); + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return __lsx_vld(values, 0); } + simdutf_really_inline simd8(const __m128i _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15); + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); } - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m256i _value) - : base8(_value) {} - // Store to array - simdutf_really_inline void store(T dst[32]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); } - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + // Order-specific operations + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lsx_vsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lsx_vslt_bu(other, *this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + value = __lsx_vsub_b(value, other.value); + return *this; + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); + } + simdutf_really_inline bool is_ascii() const { + return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; + } + template simdutf_really_inline simd8 shr() const { + return __lsx_vsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lsx_vslli_b(this->value, N); + } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); - return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); + return lookup_table.apply_lookup_16_to(*this); } template @@ -12106,115 +11224,150 @@ template struct base8_numeric : base8 { replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15)); } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto sum_u16 = __lsx_vhaddw_hu_bu(value, value); + const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + + uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); + } }; // Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} +template <> struct simd8 { + __m128i value; - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - simdutf_really_inline operator simd8() const; - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; - } - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lasx_xvslt_b(other, this->value); + static const int SIZE = sizeof(value); + + static simdutf_really_inline simd8 splat(int8_t _value) { + return __lsx_vreplgr2vr_b(_value); } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lasx_xvslt_b(this->value, other); + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return __lsx_vld(values, 0); } -}; -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, - uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, - uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, - uint8_t v31) - : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31}) {} + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i zero = __lsx_vldi(0); + if simdutf_constexpr (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), + reinterpret_cast(p + 8), 0); + } else { + __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), + reinterpret_cast(p + 8), 0); + } + } - // Saturated math - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return __lasx_xvssub_bu(this->value, other); + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); + __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + __lsx_vst(in32_0, reinterpret_cast(p), 0); + __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; + + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); + + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lasx_xvsle_bu(other, *this); + // Conversion from/to SIMD register + simdutf_really_inline simd8(const __m128i _value) : value(_value) {} + + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return __lsx_vst(value, dst, 0); } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - value = __lasx_xvsub_b(value, other.value); - return *this; + + simdutf_really_inline operator simd8() const { + return ((__m128i)this->value); } - // Bit-specific operations - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return __lsx_vor_v((__m128i)value, (__m128i)other.value); } - simdutf_really_inline bool any_bits_set_anywhere() const { - if (__lasx_xbnz_v(this->value)) - return true; - return false; + + simdutf_really_inline bool is_ascii() const { + return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == + 0xffff); } - template simdutf_really_inline simd8 shr() const { - return __lasx_xvsrli_b(this->value, N); + + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lsx_vslt_b((__m128i)other.value, (__m128i)value); } - template simdutf_really_inline simd8 shl() const { - return __lasx_xvslli_b(this->value, N); + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lsx_vslt_b((__m128i)value, (__m128i)other.value); } - simdutf_really_inline uint64_t sum_bytes() const { - const auto sum_u16 = __lasx_xvhaddw_hu_bu(value, value); - const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + template + simdutf_really_inline simd8 + prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + } - return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, + simd8(original_tmp)); } }; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); + static_assert( + NUM_CHUNKS == 4, + "LoongArch kernel should use four registers per 64-byte block."); simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64 &o) = delete; // no copy allowed @@ -12222,36 +11375,36 @@ template struct simd8x64 { operator=(const simd8 other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) - : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} simdutf_really_inline simd8x64(const T *ptr) : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} simdutf_really_inline void store(T *ptr) const { this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); } simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { this->chunks[0] |= other.chunks[0]; this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; return *this; } simdutf_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); } - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } template simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { @@ -12259,117 +11412,152 @@ template struct simd8x64 { sizeof(simd8) * 0); this->chunks[1].template store_ascii_as_utf16(ptr + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); } simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); + return __lsx_vpickve2gr_du(mask, 0); } simdutf_really_inline uint64_t lt(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) .to_bitmask(); } - simdutf_really_inline uint64_t gt(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) .to_bitmask(); } simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { const simd8 mask = simd8::splat(m); - return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), - (simd8(__m256i(this->chunks[1])) >= mask)) + return simd8x64(simd8(this->chunks[0].value) >= mask, + simd8(this->chunks[1].value) >= mask, + simd8(this->chunks[2].value) >= mask, + simd8(this->chunks[3].value) >= mask) .to_bitmask(); } }; // struct simd8x64 -/* begin file src/simdutf/lasx/simd16-inl.h */ +/* begin file src/simdutf/lsx/simd16-inl.h */ template struct simd16; +template > struct base_u16 { + __m128i value; + static const size_t SIZE = sizeof(value); + static const size_t ELEMENTS = sizeof(value) / sizeof(T); + + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); + } + simdutf_really_inline simd16 operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); + } + simdutf_really_inline simd16 operator~() const { + return __lsx_vxori_b(this->value, 0xFF); + } + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lsx_vseq_h(lhs.value, rhs.value); + } + + template + simdutf_really_inline simd16 byte_right_shift() const { + return __lsx_vbsrl_v(this->value, N); + } + + simdutf_really_inline uint16_t first() const { + return uint16_t(__lsx_vpickve2gr_w(value, 0)); + } +}; + template > -struct base16 : base> { - using bitmask_type = uint32_t; +struct base16 : base_u16 { + using bitmask_type = uint16_t; - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m256i _value) - : base>(_value) {} + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} template simdutf_really_inline base16(const Pointer *ptr) - : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} + : base16(__lsx_vld(ptr, 0)) {} - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); + static const int SIZE = sizeof(base_u16::value); - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); + } }; // SIMD byte mask type (returned by things like eq and gt) template <> struct simd16 : base16 { static simdutf_really_inline simd16 splat(bool _value) { - return __lasx_xvreplgr2vr_h(uint16_t(-(!!_value))); + return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); } simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} simdutf_really_inline bitmask_type to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); - bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline simd16 operator~() const { return *this ^ true; } - - simdutf_really_inline bool is_zero() const { - return __lasx_xbz_v(this->value); - } - - template simdutf_really_inline simd16 byte_right_shift() const { - const auto t0 = __lasx_xvbsrl_v(this->value, N); - const auto t1 = __lasx_xvpermi_q(this->value, __lasx_xvldi(0), 0b00000011); - const auto t2 = __lasx_xvbsll_v(t1, 16 - N); - const auto t3 = __lasx_xvor_v(t0, t2); - return t3; + __m128i mask = __lsx_vmsknz_b(this->value); + bitmask_type mask0 = bitmask_type(__lsx_vpickve2gr_wu(mask, 0)); + return mask0; } - simdutf_really_inline uint16_t first() const { - return uint16_t(__lasx_xvpickve2gr_w(value, 0)); - } + simdutf_really_inline bool is_zero() const { return __lsx_bz_v(this->value); } }; template struct base16_numeric : base16 { static simdutf_really_inline simd16 splat(T _value) { - return __lasx_xvreplgr2vr_h((uint16_t)_value); + return __lsx_vreplgr2vr_h(_value); } - static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } + template static simdutf_really_inline simd16 load(const Pointer values) { - return __lasx_xvld(values, 0); + return __lsx_vld(values, 0); } - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m256i _value) + simdutf_really_inline base16_numeric(const __m128i _value) : base16(_value) {} // Store to array simdutf_really_inline void store(T dst[8]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + return __lsx_vst(this->value, dst, 0); } // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } + simdutf_really_inline simd16 operator~() const { + return __lsx_vxori_b(this->value, 0xFF); + } }; -// Unsigned code units +// Unsigned code unitstemplate<> template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric((__m128i)_value) {} // Splat constructor simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} @@ -12379,71 +11567,67 @@ template <> struct simd16 : base16_numeric { simdutf_really_inline simd16(const char16_t *values) : simd16(load(reinterpret_cast(values))) {} + // Copy constructor + simdutf_really_inline simd16(const simd16 mask) : simd16(mask.value) {} + // Order-specific operations simdutf_really_inline simd16 &operator+=(const simd16 other) { - value = __lasx_xvadd_h(value, other.value); + value = __lsx_vadd_h(value, other.value); return *this; } - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lasx_xvshuf4i_b(this->value, 0b10110001); - } - template static simdutf_really_inline simd8 pack_shifted_right(const simd16 &v0, const simd16 &v1) { - return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, N), - 0b11011000); + return __lsx_vssrlni_bu_h(v1.value, v0.value, N); } // Pack with the unsigned saturation of two uint16_t code units into single // uint8_t vector static simdutf_really_inline simd8 pack(const simd16 &v0, const simd16 &v1) { - return pack_shifted_right<0>(v0, v1); } - simdutf_really_inline uint64_t sum() const { - const auto sum_u32 = __lasx_xvhaddw_wu_hu(value, value); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lsx_vshuf4i_b(this->value, 0b10110001); } - template simdutf_really_inline simd16 byte_right_shift() const { - return __lasx_xvbsrl_v(this->value, N); + simdutf_really_inline uint64_t sum() const { + const auto sum_u32 = __lsx_vhaddw_wu_hu(value, value); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + + uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); } }; simdutf_really_inline simd16 operator<(const simd16 a, const simd16 b) { - return __lasx_xvslt_hu(a.value, b.value); + return __lsx_vslt_hu(a.value, b.value); } simdutf_really_inline simd16 operator>(const simd16 a, const simd16 b) { - return __lasx_xvslt_hu(b.value, a.value); + return __lsx_vslt_hu(b.value, a.value); } simdutf_really_inline simd16 operator<=(const simd16 a, const simd16 b) { - return __lasx_xvsle_hu(a.value, b.value); + return __lsx_vsle_hu(a.value, b.value); } simdutf_really_inline simd16 operator>=(const simd16 a, const simd16 b) { - return __lasx_xvsle_hu(b.value, a.value); + return __lsx_vsle_hu(b.value, a.value); } template struct simd16x32 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); + static_assert( + NUM_CHUNKS == 4, + "LOONGARCH kernel should use four registers per 64-byte block."); simd16 chunks[NUM_CHUNKS]; simd16x32(const simd16x32 &o) = delete; // no copy allowed @@ -12451,139 +11635,127 @@ template struct simd16x32 { operator=(const simd16 other) = delete; // no assignment allowed simd16x32() = delete; // no default constructor allowed - simdutf_really_inline simd16x32(const simd16 chunk0, - const simd16 chunk1) - : chunks{chunk0, chunk1} {} + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} simdutf_really_inline simd16x32(const T *ptr) : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} simdutf_really_inline void store(T *ptr) const { this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); } simdutf_really_inline void swap_bytes() { this->chunks[0] = this->chunks[0].swap_bytes(); this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); } simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdutf_really_inline uint64_t lteq(const T m) const { const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) .to_bitmask(); } }; // struct simd16x32 -simdutf_really_inline simd16 min(const simd16 a, - const simd16 b) { - return __lasx_xvmin_hu(a.value, b.value); -} - -simdutf_really_inline simd16 operator==(const simd16 a, - uint16_t b) { - const auto bv = __lasx_xvreplgr2vr_h(b); - return __lasx_xvseq_h(a.value, bv); -} - -simdutf_really_inline simd16 as_vector_u16(const simd16 x) { - return x.value; -} - -simdutf_really_inline simd16 operator&(const simd16 a, +simdutf_really_inline simd16 operator^(const simd16 a, uint16_t b) { - const auto bv = __lasx_xvreplgr2vr_h(b); - return __lasx_xvand_v(a.value, bv); + const auto bv = __lsx_vreplgr2vr_h(b); + return __lsx_vxor_v(a.value, bv); } -simdutf_really_inline simd16 operator&(const simd16 a, - const simd16 b) { - return __lasx_xvand_v(a.value, b.value); +simdutf_really_inline simd16 operator^(const simd16 a, + const simd16 b) { + return __lsx_vxor_v(a.value, b.value); } -simdutf_really_inline simd16 operator^(const simd16 a, - uint16_t b) { - const auto bv = __lasx_xvreplgr2vr_h(b); - return __lasx_xvxor_v(a.value, bv); +simdutf_really_inline simd16 min(const simd16 a, + const simd16 b) { + return __lsx_vmin_hu(a.value, b.value); } -simdutf_really_inline simd16 operator^(const simd16 a, - const simd16 b) { - return __lasx_xvxor_v(a.value, b.value); +simdutf_really_inline simd16 as_vector_u16(const simd16 x) { + return x.value; } -/* end file src/simdutf/lasx/simd16-inl.h */ -/* begin file src/simdutf/lasx/simd32-inl.h */ +/* end file src/simdutf/lsx/simd16-inl.h */ +/* begin file src/simdutf/lsx/simd32-inl.h */ template struct simd32; template <> struct simd32 { - __m256i value; + __m128i value; static const int SIZE = sizeof(value); static const int ELEMENTS = SIZE / sizeof(uint32_t); // constructors - simdutf_really_inline simd32(__m256i v) : value(v) {} + simdutf_really_inline simd32(__m128i v) : value(v) {} template - simdutf_really_inline simd32(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} + simdutf_really_inline simd32(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} // in-place operators simdutf_really_inline simd32 &operator-=(const simd32 other) { - value = __lasx_xvsub_w(value, other.value); + value = __lsx_vsub_w(value, other.value); return *this; } // members simdutf_really_inline uint64_t sum() const { - const auto odd = __lasx_xvsrli_d(value, 32); - const auto even = __lasx_xvand_v(value, __lasx_xvreplgr2vr_d(0xffffffff)); - - const auto sum64 = __lasx_xvadd_d(odd, even); - - return uint64_t(__lasx_xvpickve2gr_du(sum64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 3)); + return uint64_t(__lsx_vpickve2gr_wu(value, 0)) + + uint64_t(__lsx_vpickve2gr_wu(value, 1)) + + uint64_t(__lsx_vpickve2gr_wu(value, 2)) + + uint64_t(__lsx_vpickve2gr_wu(value, 3)); } // static members static simdutf_really_inline simd32 splat(uint32_t x) { - return __lasx_xvreplgr2vr_w(x); + return __lsx_vreplgr2vr_w(x); } static simdutf_really_inline simd32 zero() { - return __lasx_xvrepli_w(0); + return __lsx_vrepli_w(0); } }; // ------------------------------------------------------------ template <> struct simd32 { - __m256i value; + __m128i value; static const int SIZE = sizeof(value); // constructors - simdutf_really_inline simd32(__m256i v) : value(v) {} + simdutf_really_inline simd32(__m128i v) : value(v) {} }; // ------------------------------------------------------------ simdutf_really_inline simd32 operator&(const simd32 a, const simd32 b) { - return __lasx_xvor_v(a.value, b.value); + return __lsx_vor_v(a.value, b.value); } simdutf_really_inline simd32 operator<(const simd32 a, const simd32 b) { - return __lasx_xvslt_wu(a.value, b.value); + return __lsx_vslt_wu(a.value, b.value); } simdutf_really_inline simd32 operator>(const simd32 a, const simd32 b) { - return __lasx_xvslt_wu(b.value, a.value); + return __lsx_vslt_wu(b.value, a.value); } // ------------------------------------------------------------ @@ -12591,78 +11763,76 @@ simdutf_really_inline simd32 operator>(const simd32 a, simdutf_really_inline simd32 as_vector_u32(const simd32 v) { return v.value; } -/* end file src/simdutf/lasx/simd32-inl.h */ -/* begin file src/simdutf/lasx/simd64-inl.h */ +/* end file src/simdutf/lsx/simd32-inl.h */ +/* begin file src/simdutf/lsx/simd64-inl.h */ template struct simd64; template <> struct simd64 { - __m256i value; + __m128i value; static const int SIZE = sizeof(value); static const int ELEMENTS = SIZE / sizeof(uint64_t); // constructors - simdutf_really_inline simd64(__m256i v) : value(v) {} + simdutf_really_inline simd64(__m128i v) : value(v) {} template - simdutf_really_inline simd64(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} + simdutf_really_inline simd64(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} // in-place operators simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = __lasx_xvadd_d(value, other.value); + value = __lsx_vadd_d(value, other.value); return *this; } // members simdutf_really_inline uint64_t sum() const { - return uint64_t(__lasx_xvpickve2gr_du(value, 0)) + - uint64_t(__lasx_xvpickve2gr_du(value, 1)) + - uint64_t(__lasx_xvpickve2gr_du(value, 2)) + - uint64_t(__lasx_xvpickve2gr_du(value, 3)); + return uint64_t(__lsx_vpickve2gr_du(value, 0)) + + uint64_t(__lsx_vpickve2gr_du(value, 1)); } // static members static simdutf_really_inline simd64 zero() { - return __lasx_xvrepli_d(0); + return __lsx_vrepli_d(0); } }; // ------------------------------------------------------------ template <> struct simd64 { - __m256i value; + __m128i value; static const int SIZE = sizeof(value); // constructors - simdutf_really_inline simd64(__m256i v) : value(v) {} + simdutf_really_inline simd64(__m128i v) : value(v) {} }; // ------------------------------------------------------------ simd64 sum_8bytes(const simd8 v) { - const auto sum_u16 = __lasx_xvhaddw_hu_bu(v, v); - const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + const auto sum_u16 = __lsx_vhaddw_hu_bu(v, v); + const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); return simd64(sum_u64); } -/* end file src/simdutf/lasx/simd64-inl.h */ +/* end file src/simdutf/lsx/simd64-inl.h */ } // namespace simd } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf -#endif // SIMDUTF_LASX_SIMD_H -/* end file src/simdutf/lasx/simd.h */ +#endif // SIMDUTF_LSX_SIMD_H +/* end file src/simdutf/lsx/simd.h */ -/* begin file src/simdutf/lasx/end.h */ +/* begin file src/simdutf/lsx/end.h */ #undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lasx/end.h */ +/* end file src/simdutf/lsx/end.h */ -#endif // SIMDUTF_IMPLEMENTATION_LASX +#endif // SIMDUTF_IMPLEMENTATION_LSX -#endif // SIMDUTF_LASX_H -/* end file src/simdutf/lasx.h */ +#endif // SIMDUTF_LSX_H +/* end file src/simdutf/lsx.h */ /* begin file src/simdutf/fallback.h */ #ifndef SIMDUTF_FALLBACK_H #define SIMDUTF_FALLBACK_H @@ -12887,4095 +12057,223 @@ class implementation final : public simdutf::implementation { simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( const char32_t *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( - const char16_t *input, size_t length) const noexcept; - ; - simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( - const char16_t *input, size_t length) const noexcept; - ; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - size_t binary_to_base64_with_lines(const char *input, size_t length, - char *output, size_t line_length, - base64_options options) const noexcept; - const char *find(const char *start, const char *end, - char character) const noexcept; - const char16_t *find(const char16_t *start, const char16_t *end, - char16_t character) const noexcept; - -#endif // SIMDUTF_FEATURE_BASE64 -}; -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H -/* end file src/simdutf/fallback/implementation.h */ - -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - - // Declarations -/* begin file src/simdutf/fallback/bitmanipulation.h */ -#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H -#define SIMDUTF_FALLBACK_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace fallback { -namespace {} // unnamed namespace -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H -/* end file src/simdutf/fallback/bitmanipulation.h */ - -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK -#endif // SIMDUTF_FALLBACK_H -/* end file src/simdutf/fallback.h */ -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO -SIMDUTF_POP_DISABLE_WARNINGS -#endif - -// The scalar routines should be included once. -/* begin file src/scalar/swap_bytes.h */ -#ifndef SIMDUTF_SWAP_BYTES_H -#define SIMDUTF_SWAP_BYTES_H - -namespace simdutf { -namespace scalar { - -constexpr inline simdutf_warn_unused uint16_t -u16_swap_bytes(const uint16_t word) { - return uint16_t((word >> 8) | (word << 8)); -} - -constexpr inline simdutf_warn_unused uint32_t -u32_swap_bytes(const uint32_t word) { - return ((word >> 24) & 0xff) | // move byte 3 to byte 0 - ((word << 8) & 0xff0000) | // move byte 1 to byte 2 - ((word >> 8) & 0xff00) | // move byte 2 to byte 1 - ((word << 24) & 0xff000000); // byte 0 to byte 3 -} - -namespace utf32 { -template constexpr uint32_t swap_if_needed(uint32_t c) { - return !match_system(big_endian) ? scalar::u32_swap_bytes(c) : c; -} -} // namespace utf32 - -namespace utf16 { -template constexpr uint16_t swap_if_needed(uint16_t c) { - return !match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; -} -} // namespace utf16 - -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/swap_bytes.h */ -#if SIMDUTF_FEATURE_ASCII -/* begin file src/scalar/ascii.h */ -#ifndef SIMDUTF_ASCII_H -#define SIMDUTF_ASCII_H - -namespace simdutf { -namespace scalar { -namespace { -namespace ascii { -#if SIMDUTF_IMPLEMENTATION_FALLBACK -// Only used by the fallback kernel. -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - return false; - } - } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return false; - } - } - return true; -} -#endif -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); - } - } - } - } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); - } - } - return result(error_code::SUCCESS, pos); -} - -} // namespace ascii -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/ascii.h */ -#endif // SIMDUTF_FEATURE_ASCII -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/scalar/utf8.h */ -#ifndef SIMDUTF_UTF8_H -#define SIMDUTF_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8 { -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV -// only used by the fallback kernel. -// credit: based on code from Google Fuchsia (Apache Licensed) -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return true; - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return false; - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point) || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return false; - } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} -#endif - -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - size_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return result(error_code::SUCCESS, len); - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return result(error_code::OVERLONG, pos); - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - pos = next_pos; - } - return result(error_code::SUCCESS, len); -} - -// Finds the previous leading byte starting backward from buf and validates with -// errors from there Used to pinpoint the location of an error when an invalid -// chunk is detected We assume that the stream starts with a leading byte, and -// to check that it is the case, we ask that you pass a pointer to the start of -// the stream (start). -inline simdutf_warn_unused result rewind_and_validate_with_errors( - const char *start, const char *buf, size_t len) noexcept { - // First check that we start with a leading byte - if ((*start & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, 0); - } - size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for (int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { - break; - } else { - buf--; - extra_len++; - } - } - - result res = validate_with_errors(buf, len + extra_len); - res.count -= extra_len; - return res; -} - -inline size_t count_code_points(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // -65 is 0b10111111, anything larger in two-complement's should start a new - // code point. - if (p[i] > -65) { - counter++; - } - } - return counter; -} - -inline size_t utf16_length_from_utf8(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - if (p[i] > -65) { - counter++; - } - if (uint8_t(p[i]) >= 240) { - counter++; - } - } - return counter; -} - -simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, - size_t length) { - if (length < 3) { - switch (length) { - case 2: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 2 bytes left - return length; - case 1: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - return length; - case 0: - return length; - } - } - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 3]) >= 0xf0) { - return length - 3; - } // 4-byte characters with only 3 bytes left - return length; -} - -} // namespace utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || \ - (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) -/* begin file src/scalar/utf16.h */ -#ifndef SIMDUTF_UTF16_H -#define SIMDUTF_UTF16_H - -namespace simdutf { -namespace scalar { -namespace utf16 { - -template -inline simdutf_warn_unused bool validate_as_ascii(const char16_t *data, - size_t len) noexcept { - for (size_t pos = 0; pos < len; pos++) { - char16_t word = scalar::utf16::swap_if_needed(data[pos]); - if (word >= 0x80) { - return false; - } - } - return true; -} - -template -inline simdutf_warn_unused bool validate(const char16_t *data, - size_t len) noexcept { - uint64_t pos = 0; - while (pos < len) { - char16_t word = scalar::utf16::swap_if_needed(data[pos]); - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return false; - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return false; - } - char16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - char16_t diff2 = char16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return false; - } - pos += 2; - } else { - pos++; - } - } - return true; -} - -template -inline simdutf_warn_unused result validate_with_errors(const char16_t *data, - size_t len) noexcept { - size_t pos = 0; - while (pos < len) { - char16_t word = scalar::utf16::swap_if_needed(data[pos]); - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - char16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - char16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - pos += 2; - } else { - pos++; - } - } - return result(error_code::SUCCESS, pos); -} - -template -inline size_t count_code_points(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = scalar::utf16::swap_if_needed(p[i]); - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -template -inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = scalar::utf16::swap_if_needed(p[i]); - counter++; // ASCII - counter += static_cast( - word > - 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes - counter += static_cast((word > 0x7FF && word <= 0xD7FF) || - (word >= 0xE000)); // three-byte - } - return counter; -} - -template -inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = scalar::utf16::swap_if_needed(p[i]); - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { - for (size_t i = 0; i < size; i++) { - *output++ = char16_t(input[i] >> 8 | input[i] << 8); - } -} - -template -simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, - size_t length) { - if (length == 0) { - return 0; - } - uint16_t last_word = uint16_t(input[length - 1]); - last_word = scalar::utf16::swap_if_needed(last_word); - length -= ((last_word & 0xFC00) == 0xD800); - return length; -} - -template -simdutf_constexpr bool is_high_surrogate(char16_t c) { - c = scalar::utf16::swap_if_needed(c); - return (0xd800 <= c && c <= 0xdbff); -} - -template -simdutf_constexpr bool is_low_surrogate(char16_t c) { - c = scalar::utf16::swap_if_needed(c); - return (0xdc00 <= c && c <= 0xdfff); -} - -simdutf_really_inline constexpr bool high_surrogate(char16_t c) { - return (0xd800 <= c && c <= 0xdbff); -} - -simdutf_really_inline constexpr bool low_surrogate(char16_t c) { - return (0xdc00 <= c && c <= 0xdfff); -} - -template -inline result utf8_length_from_utf16_with_replacement(const char16_t *p, - size_t len) { - bool any_surrogates = false; - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - if (is_high_surrogate(p[i])) { - any_surrogates = true; - // surrogate pair - if (i + 1 < len && is_low_surrogate(p[i + 1])) { - counter += 4; - i++; // skip low surrogate - } else { - counter += 3; // unpaired high surrogate replaced by U+FFFD - } - continue; - } else if (is_low_surrogate(p[i])) { - any_surrogates = true; - counter += 3; // unpaired low surrogate replaced by U+FFFD - continue; - } - char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; - counter++; // at least 1 byte - counter += - static_cast(word > 0x7F); // non-ASCII is at least 2 bytes - counter += static_cast(word > 0x7FF); // three-byte - } - return {any_surrogates ? error_code::SURROGATE : error_code::SUCCESS, - counter}; -} - -// variable templates are a C++14 extension -template constexpr char16_t replacement() { - return !match_system(big_endian) ? scalar::u16_swap_bytes(0xfffd) : 0xfffd; -} - -template -void to_well_formed_utf16(const char16_t *input, size_t len, char16_t *output) { - const char16_t replacement = utf16::replacement(); - bool high_surrogate_prev = false, high_surrogate, low_surrogate; - size_t i = 0; - for (; i < len; i++) { - char16_t c = input[i]; - high_surrogate = is_high_surrogate(c); - low_surrogate = is_low_surrogate(c); - if (high_surrogate_prev && !low_surrogate) { - output[i - 1] = replacement; - } - - if (!high_surrogate_prev && low_surrogate) { - output[i] = replacement; - } else { - output[i] = input[i]; - } - high_surrogate_prev = high_surrogate; - } - - /* string may not end with high surrogate */ - if (high_surrogate_prev) { - output[i - 1] = replacement; - } -} - -} // namespace utf16 -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || - // (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/scalar/utf32.h */ -#ifndef SIMDUTF_UTF32_H -#define SIMDUTF_UTF32_H - -namespace simdutf { -namespace scalar { -namespace utf32 { - -inline simdutf_warn_unused bool validate(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return false; - } - } - return true; -} - -inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - } - return result(error_code::SUCCESS, pos); -} - -inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // credit: @ttsugriy for the vectorizable approach - counter++; // ASCII - counter += static_cast(p[i] > 0x7F); // two-byte - counter += static_cast(p[i] > 0x7FF); // three-byte - counter += static_cast(p[i] > 0xFFFF); // four-bytes - } - return counter; -} - -inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - counter++; // non-surrogate word - counter += static_cast(p[i] > 0xFFFF); // surrogate pair - } - return counter; -} - -} // namespace utf32 -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1.h */ -#ifndef SIMDUTF_LATIN1_H -#define SIMDUTF_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1 { - -simdutf_really_inline size_t utf8_length_from_latin1(const char *buf, - size_t len) { - const uint8_t *c = reinterpret_cast(buf); - size_t answer = 0; - for (size_t i = 0; i < len; i++) { - if ((c[i] >> 7)) { - answer++; - } - } - return answer + len; -} - -} // namespace latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1.h */ -#endif // SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/scalar/base64.h */ -#ifndef SIMDUTF_BASE64_H -#define SIMDUTF_BASE64_H - -#include -#include -#include -#include -#include - -namespace simdutf { -namespace scalar { -namespace { -namespace base64 { - -// This function is not expected to be fast. Do not use in long loops. -// In most instances you should be using is_ignorable. -template bool is_ascii_white_space(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} - -template bool is_eight_byte(char_type c) { - if (sizeof(char_type) == 1) { - return true; - } - return uint8_t(c) == c; -} - -template -bool is_ignorable(char_type c, simdutf::base64_options options) { - const uint8_t *to_base64 = - (options & base64_default_or_url) - ? tables::base64::to_base64_default_or_url_value - : ((options & base64_url) ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value); - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage) || - (options == base64_options::base64_default_or_url_accept_garbage); - uint8_t code = to_base64[uint8_t(c)]; - if (is_eight_byte(c) && code <= 63) { - return false; - } - if (is_eight_byte(c) && code == 64) { - return true; - } - return ignore_garbage; -} -template -bool is_base64(char_type c, simdutf::base64_options options) { - const uint8_t *to_base64 = - (options & base64_default_or_url) - ? tables::base64::to_base64_default_or_url_value - : ((options & base64_url) ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value); - uint8_t code = to_base64[uint8_t(c)]; - if (is_eight_byte(c) && code <= 63) { - return true; - } - return false; -} - -template -bool is_base64_or_padding(char_type c, simdutf::base64_options options) { - const uint8_t *to_base64 = - (options & base64_default_or_url) - ? tables::base64::to_base64_default_or_url_value - : ((options & base64_url) ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value); - if (c == '=') { - return true; - } - uint8_t code = to_base64[uint8_t(c)]; - if (is_eight_byte(c) && code <= 63) { - return true; - } - return false; -} - -template -bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) { - return is_ignorable(c, options) || c == '='; -} - -struct reduced_input { - size_t equalsigns; // number of padding characters '=', typically 0, 1, 2. - size_t equallocation; // location of the first padding character if any - size_t srclen; // length of the input buffer before padding - size_t full_input_length; // length of the input buffer with padding but - // without ignorable characters -}; - -// find the end of the base64 input buffer -// It returns the number of padding characters, the location of the first -// padding character if any, the length of the input buffer before padding -// and the length of the input buffer with padding. The input buffer is not -// modified. The function assumes that there are at most two padding characters. -template -reduced_input find_end(const char_type *src, size_t srclen, - simdutf::base64_options options) { - const uint8_t *to_base64 = - (options & base64_default_or_url) - ? tables::base64::to_base64_default_or_url_value - : ((options & base64_url) ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value); - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage) || - (options == base64_options::base64_default_or_url_accept_garbage); - - size_t equalsigns = 0; - // We intentionally include trailing spaces in the full input length. - // See https://github.com/simdutf/simdutf/issues/824 - size_t full_input_length = srclen; - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equallocation = - srclen; // location of the first padding character if any - if (ignore_garbage) { - // Technically, we don't need to find the first padding character, we can - // just change our algorithms, but it adds substantial complexity. - auto it = simdutf::find(src, src + srclen, '='); - if (it != src + srclen) { - equallocation = it - src; - equalsigns = 1; - srclen = equallocation; - full_input_length = equallocation + 1; - } - return {equalsigns, equallocation, srclen, full_input_length}; - } - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - // This is the last '=' sign. - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - // This is the second '=' sign. - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - return {equalsigns, equallocation, srclen, full_input_length}; -} - -// Returns true upon success. The destination buffer must be large enough. -// This functions assumes that the padding (=) has been removed. -// if check_capacity is true, it will check that the destination buffer is -// large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL. -template -full_result base64_tail_decode_impl( - char *dst, size_t outlen, const char_type *src, size_t length, - size_t padding_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, last_chunk_handling_options last_chunk_options) { - char *dstend = dst + outlen; - (void)dstend; - // This looks like 10 branches, but we expect the compiler to resolve this to - // two branches (easily predicted): - const uint8_t *to_base64 = - (options & base64_default_or_url) - ? tables::base64::to_base64_default_or_url_value - : ((options & base64_url) ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value); - const uint32_t *d0 = - (options & base64_default_or_url) - ? tables::base64::base64_default_or_url::d0 - : ((options & base64_url) ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0); - const uint32_t *d1 = - (options & base64_default_or_url) - ? tables::base64::base64_default_or_url::d1 - : ((options & base64_url) ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1); - const uint32_t *d2 = - (options & base64_default_or_url) - ? tables::base64::base64_default_or_url::d2 - : ((options & base64_url) ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2); - const uint32_t *d3 = - (options & base64_default_or_url) - ? tables::base64::base64_default_or_url::d3 - : ((options & base64_url) ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3); - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage) || - (options == base64_options::base64_default_or_url_accept_garbage); - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if simdutf_constexpr (match_system(endianness::BIG)) { - x = scalar::u32_swap_bytes(x); - } - if (check_capacity && dstend - dst < 3) { - return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - const char_type *srccur = src; - idx = 0; - // we need at least four characters. -#ifdef __clang__ - // If possible, we read four characters at a time. (It is an optimization.) - if (ignore_garbage && src + 4 <= srcend) { - char_type c0 = src[0]; - char_type c1 = src[1]; - char_type c2 = src[2]; - char_type c3 = src[3]; - - uint8_t code0 = to_base64[uint8_t(c0)]; - uint8_t code1 = to_base64[uint8_t(c1)]; - uint8_t code2 = to_base64[uint8_t(c2)]; - uint8_t code3 = to_base64[uint8_t(c3)]; - - buffer[idx] = code0; - idx += (is_eight_byte(c0) && code0 <= 63); - buffer[idx] = code1; - idx += (is_eight_byte(c1) && code1 <= 63); - buffer[idx] = code2; - idx += (is_eight_byte(c2) && code2 <= 63); - buffer[idx] = code3; - idx += (is_eight_byte(c3) && code3 <= 63); - src += 4; - } -#endif - while ((idx < 4) && (src < srcend)) { - char_type c = *src; - - uint8_t code = to_base64[uint8_t(c)]; - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (!ignore_garbage && - (code > 64 || !scalar::base64::is_eight_byte(c))) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else { - // We have a space or a newline or garbage. We ignore it. - } - src++; - } - if (idx != 4) { - simdutf_log_assert(idx < 4, "idx should be less than 4"); - // We never should have that the number of base64 characters + the - // number of padding characters is more than 4. - if (!ignore_garbage && (idx + padding_characters > 4)) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit), true}; - } - - // The idea here is that in loose mode, - // if there is padding at all, it must be used - // to form 4-wise chunk. However, in loose mode, - // we do accept no padding at all. - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::loose && - (idx >= 2) && padding_characters > 0 && - ((idx + padding_characters) & 3) != 0) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit), true}; - } else - - // The idea here is that in strict mode, we do not want to accept - // incomplete base64 chunks. So if the chunk was otherwise valid, we - // return BASE64_INPUT_REMAINDER. - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - (idx >= 2) && ((idx + padding_characters) & 3) != 0) { - // The partial chunk was at src - idx - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit), true}; - } else - // If there is a partial chunk with insufficient padding, with - // stop_before_partial, we need to just ignore it. In "only full" - // mode, skip the minute there are padding characters. - if ((last_chunk_options == - last_chunk_handling_options::stop_before_partial && - (padding_characters + idx < 4) && (idx != 0) && - (idx >= 2 || padding_characters == 0)) || - (last_chunk_options == - last_chunk_handling_options::only_full_chunks && - (idx >= 2 || padding_characters == 0))) { - // partial means that we are *not* going to consume the read - // characters. We need to rewind the src pointer. - src = srccur; - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } else { - if (idx == 2) { - uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + - (uint32_t(buffer[1]) << 2 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (check_capacity && dstend - dst < 1) { - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), - size_t(dst - dstinit)}; - } - if simdutf_constexpr (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 1); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 1); - } - dst += 1; - } else if (idx == 3) { - uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + - (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (check_capacity && dstend - dst < 2) { - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), - size_t(dst - dstinit)}; - } - if simdutf_constexpr (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 2); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 2); - } - dst += 2; - } else if (!ignore_garbage && idx == 1 && - (!is_partial(last_chunk_options) || - (is_partial(last_chunk_options) && - padding_characters > 0))) { - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else if (!ignore_garbage && idx == 0 && padding_characters > 0) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit), true}; - } - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } - } - if (check_capacity && dstend - dst < 3) { - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), - size_t(dst - dstinit)}; - } - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); - if simdutf_constexpr (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; - } -} - -template -full_result -base64_tail_decode(char *dst, const char_type *src, size_t length, - size_t padding_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, - last_chunk_handling_options last_chunk_options) { - return base64_tail_decode_impl(dst, 0, src, length, padding_characters, - options, last_chunk_options); -} - -// like base64_tail_decode, but it will not write past the end of the output -// buffer. The outlen parameter is modified to reflect the number of bytes -// written. This functions assumes that the padding (=) has been removed. -// -template -full_result base64_tail_decode_safe( - char *dst, size_t outlen, const char_type *src, size_t length, - size_t padding_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, last_chunk_handling_options last_chunk_options) { - return base64_tail_decode_impl(dst, outlen, src, length, - padding_characters, options, - last_chunk_options); -} - -inline full_result -patch_tail_result(full_result r, size_t previous_input, size_t previous_output, - size_t equallocation, size_t full_input_length, - last_chunk_handling_options last_chunk_options) { - r.input_count += previous_input; - r.output_count += previous_output; - if (r.padding_error) { - r.input_count = equallocation; - } - - if (r.error == error_code::SUCCESS) { - if (!is_partial(last_chunk_options)) { - // A success when we are not in stop_before_partial mode. - // means that we have consumed the whole input buffer. - r.input_count = full_input_length; - } else if (r.output_count % 3 != 0) { - r.input_count = full_input_length; - } - } - return r; -} - -// Returns the number of bytes written. The destination buffer must be large -// enough. It will add padding (=) if needed. -template -size_t tail_encode_base64_impl( - char *dst, const char *src, size_t srclen, base64_options options, - size_t line_length = simdutf::default_line_length, size_t line_offset = 0) { - if (use_lines) { - // sanitize line_length and starting_line_offset. - // line_length must be greater than 3. - if (line_length < 4) { - line_length = 4; - } - simdutf_log_assert(line_offset <= line_length, - "line_offset should be less than line_length"); - } - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - // This looks like 3 branches, but we expect the compiler to resolve this to - // a single branch: - const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 - : tables::base64::base64_default::e0; - const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 - : tables::base64::base64_default::e1; - const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 - : tables::base64::base64_default::e2; - char *out = dst; - size_t i = 0; - uint8_t t1, t2, t3; - for (; i + 2 < srclen; i += 3) { - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - t3 = uint8_t(src[i + 2]); - if (use_lines) { - if (line_offset + 3 >= line_length) { - if (line_offset == line_length) { - *out++ = '\n'; - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - line_offset = 4; - } else if (line_offset + 1 == line_length) { - *out++ = e0[t1]; - *out++ = '\n'; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - line_offset = 3; - } else if (line_offset + 2 == line_length) { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = '\n'; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - line_offset = 2; - } else if (line_offset + 3 == line_length) { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = '\n'; - *out++ = e2[t3]; - line_offset = 1; - } - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - line_offset += 4; - } - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - } - } - switch (srclen - i) { - case 0: - break; - case 1: - t1 = uint8_t(src[i]); - if (use_lines) { - if (use_padding) { - if (line_offset + 3 >= line_length) { - if (line_offset == line_length) { - *out++ = '\n'; - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - *out++ = '='; - *out++ = '='; - } else if (line_offset + 1 == line_length) { - *out++ = e0[t1]; - *out++ = '\n'; - *out++ = e1[(t1 & 0x03) << 4]; - *out++ = '='; - *out++ = '='; - } else if (line_offset + 2 == line_length) { - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - *out++ = '\n'; - *out++ = '='; - *out++ = '='; - } else if (line_offset + 3 == line_length) { - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - *out++ = '='; - *out++ = '\n'; - *out++ = '='; - } - } else { - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - *out++ = '='; - *out++ = '='; - } - } else { - if (line_offset + 2 >= line_length) { - if (line_offset == line_length) { - *out++ = '\n'; - *out++ = e0[uint8_t(src[i])]; - *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; - } else if (line_offset + 1 == line_length) { - *out++ = e0[uint8_t(src[i])]; - *out++ = '\n'; - *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; - } else { - *out++ = e0[uint8_t(src[i])]; - *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; - // *out++ = '\n'; ==> no newline at the end of the output - } - } else { - *out++ = e0[uint8_t(src[i])]; - *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; - } - } - } else { - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - if (use_padding) { - *out++ = '='; - *out++ = '='; - } - } - break; - default: /* case 2 */ - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - if (use_lines) { - if (use_padding) { - if (line_offset + 3 >= line_length) { - if (line_offset == line_length) { - *out++ = '\n'; - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - *out++ = '='; - } else if (line_offset + 1 == line_length) { - *out++ = e0[t1]; - *out++ = '\n'; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - *out++ = '='; - } else if (line_offset + 2 == line_length) { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = '\n'; - *out++ = e2[(t2 & 0x0F) << 2]; - *out++ = '='; - } else if (line_offset + 3 == line_length) { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - *out++ = '\n'; - *out++ = '='; - } - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - *out++ = '='; - } - } else { - if (line_offset + 3 >= line_length) { - if (line_offset == line_length) { - *out++ = '\n'; - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - } else if (line_offset + 1 == line_length) { - *out++ = e0[t1]; - *out++ = '\n'; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - } else if (line_offset + 2 == line_length) { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = '\n'; - *out++ = e2[(t2 & 0x0F) << 2]; - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - // *out++ = '\n'; ==> no newline at the end of the output - } - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - } - } - } else { - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - if (use_padding) { - *out++ = '='; - } - } - } - return (size_t)(out - dst); -} - -// Returns the number of bytes written. The destination buffer must be large -// enough. It will add padding (=) if needed. -inline size_t tail_encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - return tail_encode_base64_impl(dst, src, srclen, options); -} - -template -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char_type *input, size_t length) noexcept { - // We process the padding characters ('=') at the end to make sure - // that we return an exact result when the input has no ignorable characters - // (e.g., spaces). - size_t padding = 0; - if (length > 0) { - if (input[length - 1] == '=') { - padding++; - if (length > 1 && input[length - 2] == '=') { - padding++; - } - } - } - // The input is not otherwise processed for ignorable characters or - // validation, so that the function runs in constant time (very fast). In - // practice, base64 inputs without ignorable characters are common and the - // common case are line separated inputs with relatively long lines (e.g., 76 - // characters) which leads this function to a slight (1%) overestimation of - // the output size. - // - // Of course, some inputs might contain an arbitrary number of spaces or - // newlines, which would make this function return a very pessimistic output - // size but systems that produce base64 outputs typically do not do that and - // if they do, they do not care much about minimizing memory usage. - // - // In specialized applications, users may know that their input is line - // separated, which can be checked very quickly by by iterating (e.g., over 76 - // character chunks, looking for the linefeed characters only). We could - // provide a specialized function for that, but it is not clear that the added - // complexity is worth it for us. - // - size_t actual_length = length - padding; - if (actual_length % 4 <= 1) { - return actual_length / 4 * 3; - } - // if we have a valid input, then the remainder must be 2 or 3 adding one or - // two extra bytes. - return actual_length / 4 * 3 + (actual_length % 4) - 1; -} - -template -simdutf_warn_unused full_result base64_to_binary_details_impl( - const char_type *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage) || - (options == base64_options::base64_default_or_url_accept_garbage); - auto ri = simdutf::scalar::base64::find_end(input, length, options); - size_t equallocation = ri.equallocation; - size_t equalsigns = ri.equalsigns; - length = ri.srclen; - size_t full_input_length = ri.full_input_length; - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, full_input_length, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, - full_input_length, last_chunk_options); - if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && - equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - // When is_partial(last_chunk_options) is true, we must either end with - // the end of the stream (beyond whitespace) or right after a non-ignorable - // character or at the very beginning of the stream. - // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && - r.input_count < full_input_length) { - // First check if we can extend the input to the end of the stream - while (r.input_count < full_input_length && - base64_ignorable(*(input + r.input_count), options)) { - r.input_count++; - } - // If we are still not at the end of the stream, then we must backtrack - // to the last non-ignorable character. - if (r.input_count < full_input_length) { - while (r.input_count > 0 && - base64_ignorable(*(input + r.input_count - 1), options)) { - r.input_count--; - } - } - } - return r; -} - -template -simdutf_warn_unused full_result base64_to_binary_details_safe_impl( - const char_type *input, size_t length, char *output, size_t outlen, - base64_options options, - last_chunk_handling_options last_chunk_options) noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage) || - (options == base64_options::base64_default_or_url_accept_garbage); - auto ri = simdutf::scalar::base64::find_end(input, length, options); - size_t equallocation = ri.equallocation; - size_t equalsigns = ri.equalsigns; - length = ri.srclen; - size_t full_input_length = ri.full_input_length; - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, full_input_length, 0}; - } - full_result r = scalar::base64::base64_tail_decode_safe( - output, outlen, input, length, equalsigns, options, last_chunk_options); - r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, - full_input_length, last_chunk_options); - if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && - equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - - // When is_partial(last_chunk_options) is true, we must either end with - // the end of the stream (beyond whitespace) or right after a non-ignorable - // character or at the very beginning of the stream. - // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && - r.input_count < full_input_length) { - // First check if we can extend the input to the end of the stream - while (r.input_count < full_input_length && - base64_ignorable(*(input + r.input_count), options)) { - r.input_count++; - } - // If we are still not at the end of the stream, then we must backtrack - // to the last non-ignorable character. - if (r.input_count < full_input_length) { - while (r.input_count > 0 && - base64_ignorable(*(input + r.input_count - 1), options)) { - r.input_count--; - } - } - } - return r; -} - -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - if (!use_padding) { - return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); - } - return (length + 2) / 3 * - 4; // We use padding to make the length a multiple of 4. -} - -simdutf_warn_unused size_t base64_length_from_binary_with_lines( - size_t length, base64_options options, size_t line_length) noexcept { - if (length == 0) { - return 0; - } - size_t base64_length = - scalar::base64::base64_length_from_binary(length, options); - if (line_length < 4) { - line_length = 4; - } - size_t lines = - (base64_length + line_length - 1) / line_length; // number of lines - return base64_length + lines - 1; -} - -// Return the length of the prefix that contains count base64 characters. -// Thus, if count is 3, the function returns the length of the prefix -// that contains 3 base64 characters. -// The function returns (size_t)-1 if there is not enough base64 characters in -// the input. -template -simdutf_warn_unused size_t prefix_length(size_t count, - simdutf::base64_options options, - const char_type *input, - size_t length) noexcept { - size_t i = 0; - while (i < length && is_ignorable(input[i], options)) { - i++; - } - if (count == 0) { - return i; // duh! - } - for (; i < length; i++) { - if (is_ignorable(input[i], options)) { - continue; - } - // We have a base64 character or a padding character. - count--; - if (count == 0) { - return i + 1; - } - } - simdutf_log_assert(false, "You never get here"); - - return -1; // should never happen -} - -} // namespace base64 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/base64.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H -#define SIMDUTF_VALID_UTF32_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { - -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 -// only used by the fallback and POWER kernel -inline size_t convert_valid(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_output - start; -} -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 - -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -/* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ -#ifndef SIMDUTF_UTF32_TO_UTF8_H -#define SIMDUTF_UTF32_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { - -inline size_t convert(const char32_t *buf, size_t len, char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return 0; - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_output - start; -} - -inline result convert_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return result(error_code::SUCCESS, utf8_output - start); -} - -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H -#define SIMDUTF_VALID_UTF32_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { - -template -inline size_t convert_valid(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - pos++; - } else { - // will generate a surrogate pair - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos++; - } - } - return utf16_output - start; -} - -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -/* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ -#ifndef SIMDUTF_UTF32_TO_UTF16_H -#define SIMDUTF_UTF32_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { - -template -inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return 0; - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - pos++; - } - return utf16_output - start; -} - -template -inline result convert_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - pos++; - } - return result(error_code::SUCCESS, utf16_output - start); -} - -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H -#define SIMDUTF_VALID_UTF16_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 4 ASCII characters - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if simdutf_constexpr (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); - } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; - } - } - - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return utf8_output - start; -} - -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -/* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ -#ifndef SIMDUTF_UTF16_TO_UTF8_H -#define SIMDUTF_UTF16_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { - -template -inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if simdutf_constexpr (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); - } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; - } - } - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - if (pos + 1 >= len) { - return 0; - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; - } - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return utf8_output - start; -} - -template -inline full_result convert_with_errors(const char16_t *buf, size_t len, - char *utf8_output, size_t utf8_len = 0) { - const uint16_t *data = reinterpret_cast(buf); - if (check_output && utf8_len == 0) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, 0, 0); - } - - size_t pos = 0; - char *start{utf8_output}; - char *end{utf8_output + utf8_len}; - - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if simdutf_constexpr (!match_system(big_endian)) - v = (v >> 8) | (v << (64 - 8)); - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - if (check_output && size_t(end - utf8_output) == 0) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, - utf8_output - start); - } - } - continue; - } - } - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - if (check_output && size_t(end - utf8_output) == 0) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, - utf8_output - start); - } - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - if (check_output && size_t(end - utf8_output) < 2) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, - utf8_output - start); - } - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (check_output && size_t(end - utf8_output) < 3) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, - utf8_output - start); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - - if (check_output && size_t(end - utf8_output) < 4) { - return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, - utf8_output - start); - } - // must be a surrogate pair - if (pos + 1 >= len) { - return full_result(error_code::SURROGATE, pos, utf8_output - start); - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return full_result(error_code::SURROGATE, pos, utf8_output - start); - } - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return full_result(error_code::SURROGATE, pos, utf8_output - start); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return full_result(error_code::SUCCESS, pos, utf8_output - start); -} - -template -inline result simple_convert_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { - return convert_with_errors(buf, len, utf8_output, 0); -} - -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H -#define SIMDUTF_VALID_UTF16_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return utf32_output - start; -} - -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -/* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ -#ifndef SIMDUTF_UTF16_TO_UTF32_H -#define SIMDUTF_UTF16_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { - -template -inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; - } - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return utf32_output - start; -} - -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return result(error_code::SUCCESS, utf32_output - start); -} - -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && \ - (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) -/* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H -#define SIMDUTF_VALID_UTF8_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { - -template -inline size_t convert_valid(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - if simdutf_constexpr (!match_system(big_endian)) { - code_point = u16_swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - if simdutf_constexpr (!match_system(big_endian)) { - code_point = u16_swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_point = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ -#ifndef SIMDUTF_UTF8_TO_UTF16_H -#define SIMDUTF_UTF8_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { - -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - if simdutf_constexpr (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return 0; - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - if simdutf_constexpr (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - return 0; - } - } - return utf16_output - start; -} - -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - if simdutf_constexpr (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - if simdutf_constexpr (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - } - return result(error_code::SUCCESS, utf16_output - start); -} - -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -template -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char16_t *utf16_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - result res = convert_with_errors(buf, len + extra_len, utf16_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H -#define SIMDUTF_VALID_UTF8_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { - -inline size_t convert_valid(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_word = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - *utf32_output++ = char32_t(code_word); - pos += 4; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ -#ifndef SIMDUTF_UTF8_TO_UTF32_H -#define SIMDUTF_UTF8_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { - -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return 0; - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - return 0; - } - } - return utf32_output - start; -} - -inline result convert_with_errors(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - } - return result(error_code::SUCCESS, utf32_output - start); -} - -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes location (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char32_t *utf32_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - size_t how_far_back = 3; // 3 bytes in the past + current position - if (how_far_back > prior_bytes) { - how_far_back = prior_bytes; - } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - - result res = convert_with_errors(buf, len + extra_len, utf32_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 -#endif -/* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF8_H -#define SIMDUTF_LATIN1_TO_UTF8_H +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf8 { +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -inline size_t convert(const char *buf, size_t len, char *utf8_output) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t utf8_pos = 0; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - utf8_output[utf8_pos++] = char(buf[pos]); - pos++; - } - continue; - } - } +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - unsigned char byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_pos; -} +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, - size_t utf8_len) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t skip_pos = 0; - size_t utf8_pos = 0; - while (pos < len && utf8_pos < utf8_len) { - // try to convert the next block of 16 ASCII bytes - if (pos >= skip_pos && pos + 16 <= len && - utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, - // check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - ::memcpy(utf8_output + utf8_pos, buf + pos, 16); - utf8_pos += 16; - pos += 16; - } else { - // At least one of the next 16 bytes are not ASCII, we will process them - // one by one - skip_pos = pos + 16; - } - } else { - const auto byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else if (utf8_pos + 2 <= utf8_len) { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } else { - break; - } - } - } - return utf8_pos; -} +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -} // namespace latin1_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#endif -/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF16_H -#define SIMDUTF_LATIN1_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf16 { -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); - pos++; - } +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - return utf16_output - start; -} +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; +#endif // SIMDUTF_FEATURE_BASE64 +}; +} // namespace fallback +} // namespace simdutf - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); - pos++; - } +#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H +/* end file src/simdutf/fallback/implementation.h */ - return result(error_code::SUCCESS, utf16_output - start); -} +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ -} // namespace latin1_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + // Declarations +/* begin file src/simdutf/fallback/bitmanipulation.h */ +#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H +#define SIMDUTF_FALLBACK_BITMANIPULATION_H -#endif -/* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF32_H -#define SIMDUTF_LATIN1_TO_UTF32_H +#include namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf32 { +namespace fallback { +namespace {} // unnamed namespace +} // namespace fallback +} // namespace simdutf -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const unsigned char *data = reinterpret_cast(buf); - char32_t *start{utf32_output}; - for (size_t i = 0; i < len; i++) { - *utf32_output++ = (char32_t)data[i]; - } - return utf32_output - start; -} +#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H +/* end file src/simdutf/fallback/bitmanipulation.h */ -} // namespace latin1_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK +#endif // SIMDUTF_FALLBACK_H +/* end file src/simdutf/fallback.h */ +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO +SIMDUTF_POP_DISABLE_WARNINGS #endif -/* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ -#ifndef SIMDUTF_UTF8_TO_LATIN1_H -#define SIMDUTF_UTF8_TO_LATIN1_H +// The scalar routines should be included once. +#if SIMDUTF_FEATURE_ASCII +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || \ + (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || + // (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 +#endif // SIMDUTF_FEATURE_BASE64 -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -inline size_t convert(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000 .... etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80 || 0xFF < code_point) { - return 0; // We only care about the range 129-255 which is Non-ASCII - // latin1 characters. A code_point beneath 0x80 is invalid as - // it is already covered by bytes whose leading bit is zero. - } - *latin_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } - return latin_output - start; -} +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -inline result convert_with_errors(const char *buf, size_t len, - char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000...etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80) { - return result(error_code::OVERLONG, pos); - } - if (0xFF < code_point) { - return result(error_code::TOO_LARGE, pos); - } // We only care about the range 129-255 which is Non-ASCII latin1 - // characters - *latin_output++ = char(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - return result(error_code::TOO_LARGE, pos); - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - return result(error_code::TOO_LARGE, pos); - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - return result(error_code::HEADER_BITS, pos); - } - } - return result(error_code::SUCCESS, latin_output - start); -} - -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char *latin1_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - result res = convert_with_errors(buf, len + extra_len, latin1_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} +#if SIMDUTF_FEATURE_UTF8 && \ + (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 -#endif -/* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ -#ifndef SIMDUTF_UTF16_TO_LATIN1_H -#define SIMDUTF_UTF16_TO_LATIN1_H - -#include // for std::memcpy - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { - -template -inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { - if (len == 0) { - return 0; - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *current_write = latin_output; - uint16_t word = 0; - uint16_t too_large = 0; - - while (pos < len) { - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - too_large |= word; - *current_write++ = char(word & 0xFF); - pos++; - } - if ((too_large & 0xFF00) != 0) { - return 0; - } - - return current_write - latin_output; -} - -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char *latin_output) { - if (len == 0) { - return result(error_code::SUCCESS, 0); - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word; - - while (pos < len) { - if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that - // they are Latin1 - uint64_t v1, v2, v3, v4; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); - ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); - ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); - - if simdutf_constexpr (!match_system(big_endian)) { - v1 = (v1 >> 8) | (v1 << (64 - 8)); - } - if simdutf_constexpr (!match_system(big_endian)) { - v2 = (v2 >> 8) | (v2 << (64 - 8)); - } - if simdutf_constexpr (!match_system(big_endian)) { - v3 = (v3 >> 8) | (v3 << (64 - 8)); - } - if simdutf_constexpr (!match_system(big_endian)) { - v4 = (v4 >> 8) | (v4 << (64 - 8)); - } - - if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(data[pos])) - : char(data[pos]); - pos++; - } - continue; - } - } - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF00) == 0) { - *latin_output++ = char(word & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); - } - } - return result(error_code::SUCCESS, latin_output - start); -} - -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ -#ifndef SIMDUTF_UTF32_TO_LATIN1_H -#define SIMDUTF_UTF32_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { - -inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; - uint32_t too_large = 0; - - while (pos < len) { - utf32_char = (uint32_t)data[pos]; - too_large |= utf32_char; - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } - if ((too_large & 0xFFFFFF00) != 0) { - return 0; - } - return latin1_output - start; -} - -inline result convert_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start{latin1_output}; - size_t pos = 0; - while (pos < len) { - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t utf32_char = data[pos]; - if ((utf32_char & 0xFFFFFF00) == - 0) { // Check if the character can be represented in Latin-1 - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); - }; - } - return result(error_code::SUCCESS, latin1_output - start); -} - -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H -#define SIMDUTF_VALID_UTF8_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { - -inline size_t convert_valid(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } - - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - *latin_output++ = char(code_point); - pos += 2; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return latin_output - start; -} - -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H -#define SIMDUTF_VALID_UTF16_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *latin_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word = 0; - - while (pos < len) { - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - *latin_output++ = char(word); - pos++; - } - - return latin_output - start; -} - -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H -#define SIMDUTF_VALID_UTF32_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { - -inline size_t convert_valid(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; - - while (pos < len) { - utf32_char = (uint32_t)data[pos]; - - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } else { - // output can not be represented in latin1 - return 0; - } - } - if ((utf32_char & 0xFFFFFF00) == 0) { - *latin1_output++ = char(utf32_char); - } else { - // output can not be represented in latin1 - return 0; - } - pos++; - } - return latin1_output - start; -} - -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#endif -/* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/implementation.cpp */ @@ -16983,113 +12281,7 @@ inline size_t convert_valid(const char32_t *buf, size_t len, #include #include #if SIMDUTF_ATOMIC_REF -/* begin file src/scalar/atomic_util.h */ -#ifndef SIMDUTF_ATOMIC_UTIL_H -#define SIMDUTF_ATOMIC_UTIL_H -#if SIMDUTF_ATOMIC_REF - #include -namespace simdutf { -namespace scalar { - -// This function is a memcpy that uses atomic operations to read from the -// source. -inline void memcpy_atomic_read(char *dst, const char *src, size_t len) { - static_assert(std::atomic_ref::required_alignment == sizeof(char), - "std::atomic_ref requires the same alignment as char_type"); - // We expect all 64-bit systems to be able to read 64-bit words from an - // aligned memory region atomically. You might be able to do better on - // specific systems, e.g., x64 systems can read 128-bit words atomically. - constexpr size_t alignment = sizeof(uint64_t); - - // Lambda for atomic byte-by-byte copy - auto bbb_memcpy_atomic_read = [](char *bytedst, const char *bytesrc, - size_t bytelen) noexcept { - char *mutable_src = const_cast(bytesrc); - for (size_t j = 0; j < bytelen; ++j) { - bytedst[j] = - std::atomic_ref(mutable_src[j]).load(std::memory_order_relaxed); - } - }; - - // Handle unaligned start - size_t offset = reinterpret_cast(src) % alignment; - if (offset) { - size_t to_align = std::min(len, alignment - offset); - bbb_memcpy_atomic_read(dst, src, to_align); - src += to_align; - dst += to_align; - len -= to_align; - } - - // Process aligned 64-bit chunks - while (len >= alignment) { - auto *src_aligned = reinterpret_cast(const_cast(src)); - const auto dst_value = - std::atomic_ref(*src_aligned).load(std::memory_order_relaxed); - std::memcpy(dst, &dst_value, sizeof(uint64_t)); - src += alignment; - dst += alignment; - len -= alignment; - } - - // Handle remaining bytes - if (len) { - bbb_memcpy_atomic_read(dst, src, len); - } -} - -// This function is a memcpy that uses atomic operations to write to the -// destination. -inline void memcpy_atomic_write(char *dst, const char *src, size_t len) { - static_assert(std::atomic_ref::required_alignment == sizeof(char), - "std::atomic_ref requires the same alignment as char"); - // We expect all 64-bit systems to be able to write 64-bit words to an aligned - // memory region atomically. - // You might be able to do better on specific systems, e.g., x64 systems can - // write 128-bit words atomically. - constexpr size_t alignment = sizeof(uint64_t); - - // Lambda for atomic byte-by-byte write - auto bbb_memcpy_atomic_write = [](char *bytedst, const char *bytesrc, - size_t bytelen) noexcept { - for (size_t j = 0; j < bytelen; ++j) { - std::atomic_ref(bytedst[j]) - .store(bytesrc[j], std::memory_order_relaxed); - } - }; - - // Handle unaligned start - size_t offset = reinterpret_cast(dst) % alignment; - if (offset) { - size_t to_align = std::min(len, alignment - offset); - bbb_memcpy_atomic_write(dst, src, to_align); - dst += to_align; - src += to_align; - len -= to_align; - } - - // Process aligned 64-bit chunks - while (len >= alignment) { - auto *dst_aligned = reinterpret_cast(dst); - uint64_t src_val; - std::memcpy(&src_val, src, sizeof(uint64_t)); // Non-atomic read from src - std::atomic_ref(*dst_aligned) - .store(src_val, std::memory_order_relaxed); - dst += alignment; - src += alignment; - len -= alignment; - } - - // Handle remaining bytes - if (len) { - bbb_memcpy_atomic_write(dst, src, len); - } -} -} // namespace scalar -} // namespace simdutf -#endif // SIMDUTF_ATOMIC_REF -#endif // SIMDUTF_ATOMIC_UTIL_H -/* end file src/scalar/atomic_util.h */ + #include #endif static_assert(sizeof(uint8_t) == sizeof(char), @@ -17235,18 +12427,18 @@ static const rvv::implementation *get_rvv_singleton() { return &rvv_singleton; } #endif -#if SIMDUTF_IMPLEMENTATION_LSX -static const lsx::implementation *get_lsx_singleton() { - static const lsx::implementation lsx_singleton{}; - return &lsx_singleton; -} -#endif #if SIMDUTF_IMPLEMENTATION_LASX static const lasx::implementation *get_lasx_singleton() { static const lasx::implementation lasx_singleton{}; return &lasx_singleton; } #endif +#if SIMDUTF_IMPLEMENTATION_LSX +static const lsx::implementation *get_lsx_singleton() { + static const lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +#endif #if SIMDUTF_IMPLEMENTATION_FALLBACK static const fallback::implementation *get_fallback_singleton() { static const fallback::implementation fallback_singleton{}; @@ -17272,12 +12464,12 @@ static const implementation *get_single_implementation() { #if SIMDUTF_IMPLEMENTATION_PPC64 get_ppc64_singleton(); #endif - #if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(); - #endif #if SIMDUTF_IMPLEMENTATION_LASX get_lasx_singleton(); #endif + #if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(); + #endif #if SIMDUTF_IMPLEMENTATION_FALLBACK get_fallback_singleton(); #endif @@ -17899,12 +13091,12 @@ get_available_implementation_pointers() { #if SIMDUTF_IMPLEMENTATION_RVV get_rvv_singleton(), #endif -#if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(), -#endif #if SIMDUTF_IMPLEMENTATION_LASX get_lasx_singleton(), #endif +#if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif #if SIMDUTF_IMPLEMENTATION_FALLBACK get_fallback_singleton(), #endif @@ -17959,7 +13151,7 @@ class unsupported_implementation final : public implementation { } #endif // SIMDUTF_FEATURE_ASCII -#if SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *, size_t) const noexcept final override { @@ -17971,7 +13163,7 @@ class unsupported_implementation final : public implementation { size_t) const noexcept final override { return false; } -#endif // SIMDUTF_FEATURE_ASCII +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool @@ -18508,7 +13700,7 @@ internal::atomic_ptr &get_default_implementation() { return get_active_implementation(); } #endif -#define SIMDUTF_GET_CURRENT_IMPLEMENTION +#define SIMDUTF_GET_CURRENT_IMPLEMENTATION #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { @@ -18587,12 +13779,9 @@ simdutf_warn_unused size_t convert_latin1_to_utf32( return get_default_implementation()->convert_latin1_to_utf32(buf, len, latin1_output); } -simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept { - return length; -} -simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept { - return length; -} +// moved to the header file +// simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept +// simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -18691,7 +13880,9 @@ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept { return get_default_implementation()->validate_utf16le(buf, len); } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_BASE64 #if SIMDUTF_ATOMIC_REF template simdutf_warn_unused result atomic_base64_to_binary_safe_impl( @@ -18781,7 +13972,7 @@ simdutf_warn_unused result atomic_base64_to_binary_safe( } #endif // SIMDUTF_ATOMIC_REF -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#endif // SIMDUTF_FEATURE_BASE64 #if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, @@ -18961,12 +14152,9 @@ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( return get_default_implementation()->convert_utf16be_to_latin1_with_errors( buf, len, latin1_buffer); } -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept { - return length; -} -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { - return length; -} +// moved to header file +// simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept +// simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -19247,8 +14435,7 @@ simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, size_t length) noexcept { #if SIMDUTF_IS_BIG_ENDIAN - result r = utf8_length_from_utf16be_with_replacement(input, length); - return r.count; + return utf8_length_from_utf16be(input, length); #else return utf8_length_from_utf16le(input, length); #endif @@ -19332,23 +14519,22 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, #if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options option) noexcept { - return scalar::base64::base64_length_from_binary(length, option); -} +// this has been moved to implementation.h +// simdutf_warn_unused size_t +// base64_length_from_binary(size_t length, base64_options option) noexcept; -simdutf_warn_unused size_t base64_length_from_binary_with_lines( - size_t length, base64_options options, size_t line_length) noexcept { - return scalar::base64::base64_length_from_binary_with_lines(length, options, - line_length); -} +// this has been moved to implementation.h +// simdutf_warn_unused size_t base64_length_from_binary_with_lines( +// size_t length, base64_options options, size_t line_length) noexcept; +// } -simdutf_warn_unused const char *find(const char *start, const char *end, - char character) noexcept { +simdutf_warn_unused const char *detail::find(const char *start, const char *end, + char character) noexcept { return get_default_implementation()->find(start, end, character); } -simdutf_warn_unused const char16_t * -find(const char16_t *start, const char16_t *end, char16_t character) noexcept { +simdutf_warn_unused const char16_t *detail::find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { return get_default_implementation()->find(start, end, character); } @@ -19378,166 +14564,22 @@ simdutf_warn_unused result base64_to_binary( input, length, output, options, last_chunk_handling_options); } -template -simdutf_warn_unused result slow_base64_to_binary_safe_impl( - const chartype *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_options) noexcept { - const bool ignore_garbage = (options & base64_default_accept_garbage) != 0; - auto ri = simdutf::scalar::base64::find_end(input, length, options); - size_t equallocation = ri.equallocation; - size_t equalsigns = ri.equalsigns; - length = ri.srclen; - size_t full_input_length = ri.full_input_length; - (void)full_input_length; - if (length == 0) { - outlen = 0; - if (!ignore_garbage && equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - - // The parameters of base64_tail_decode_safe are: - // - dst: the output buffer - // - outlen: the size of the output buffer - // - srcr: the input buffer - // - length: the size of the input buffer - // - padded_characters: the number of padding characters - // - options: the options for the base64 decoder - // - last_chunk_options: the options for the last chunk - // The function will return the number of bytes written to the output buffer - // and the number of bytes read from the input buffer. - // The function will also return an error code if the input buffer is not - // valid base64. - full_result r = scalar::base64::base64_tail_decode_safe( - output, outlen, input, length, equalsigns, options, last_chunk_options); - r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, - full_input_length, last_chunk_options); - outlen = r.output_count; - if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && - equalsigns > 0) { - // additional checks - if ((outlen % 3 == 0) || ((outlen % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - } - } - return {r.error, r.input_count}; // we cannot return r itself because it gets - // converted to error/output_count -} -simdutf_warn_unused bool base64_ignorable(char input, - base64_options options) noexcept { - return scalar::base64::is_ignorable(input, options); -} -simdutf_warn_unused bool base64_ignorable(char16_t input, - base64_options options) noexcept { - return scalar::base64::is_ignorable(input, options); -} -simdutf_warn_unused bool base64_valid(char input, - base64_options options) noexcept { - return scalar::base64::is_base64(input, options); -} -simdutf_warn_unused bool base64_valid(char16_t input, - base64_options options) noexcept { - return scalar::base64::is_base64(input, options); -} -simdutf_warn_unused bool -base64_valid_or_padding(char input, base64_options options) noexcept { - return scalar::base64::is_base64_or_padding(input, options); -} -simdutf_warn_unused bool -base64_valid_or_padding(char16_t input, base64_options options) noexcept { - return scalar::base64::is_base64_or_padding(input, options); -} +// moved to implementation.h +// simdutf_warn_unused bool base64_ignorable(char input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_ignorable(char16_t input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_valid(char input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_valid(char16_t input, +// base64_options options) noexcept +// simdutf_warn_unused bool +// base64_valid_or_padding(char input, base64_options options) noexcept +// simdutf_warn_unused bool +// base64_valid_or_padding(char16_t input, base64_options options) noexcept -template -simdutf_warn_unused result base64_to_binary_safe_impl( - const chartype *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options, - bool decode_up_to_bad_char) noexcept { - static_assert(std::is_same::value || - std::is_same::value, - "Only char and char16_t are supported."); - size_t remaining_input_length = length; - size_t remaining_output_length = outlen; - size_t input_position = 0; - size_t output_position = 0; - - // We also do a first pass using the fast path to decode as much as possible - size_t safe_input = (std::min)( - remaining_input_length, - base64_length_from_binary(remaining_output_length / 3 * 3, options)); - bool done_with_partial = (safe_input == remaining_input_length); - simdutf::full_result r = - get_default_implementation()->base64_to_binary_details( - input + input_position, safe_input, output + output_position, options, - done_with_partial - ? last_chunk_handling_options - : simdutf::last_chunk_handling_options::only_full_chunks); - simdutf_log_assert(r.input_count <= safe_input, - "You should not read more than safe_input"); - simdutf_log_assert(r.output_count <= remaining_output_length, - "You should not write more than remaining_output_length"); - // Technically redundant, but we want to be explicit about it. - input_position += r.input_count; - output_position += r.output_count; - remaining_input_length -= r.input_count; - remaining_output_length -= r.output_count; - if (r.error != simdutf::error_code::SUCCESS) { - // There is an error. We return. - if (decode_up_to_bad_char && - r.error == error_code::INVALID_BASE64_CHARACTER) { - return slow_base64_to_binary_safe_impl( - input, length, output, outlen, options, last_chunk_handling_options); - } - outlen = output_position; - return {r.error, input_position}; - } - - if (done_with_partial) { - // We are done. We have decoded everything. - outlen = output_position; - return {simdutf::error_code::SUCCESS, input_position}; - } - // We have decoded some data, but we still have some data to decode. - // We need to decode the rest of the input buffer. - r = simdutf::scalar::base64::base64_to_binary_details_safe_impl( - input + input_position, remaining_input_length, output + output_position, - remaining_output_length, options, last_chunk_handling_options); - input_position += r.input_count; - output_position += r.output_count; - remaining_input_length -= r.input_count; - remaining_output_length -= r.output_count; - - if (r.error != simdutf::error_code::SUCCESS) { - // There is an error. We return. - if (decode_up_to_bad_char && - r.error == error_code::INVALID_BASE64_CHARACTER) { - return slow_base64_to_binary_safe_impl( - input, length, output, outlen, options, last_chunk_handling_options); - } - outlen = output_position; - return {r.error, input_position}; - } - if (input_position < length) { - // We cannot process the entire input in one go, so we need to - // process it in two steps: first the fast path, then the slow path. - // In some cases, the processing might 'eat up' trailing ignorable - // characters in the fast path, but that can be a problem. - // suppose we have just white space followed by a single base64 character. - // If we first process the white space with the fast path, it will - // eat all of it. But, by the JavaScript standard, we should consume - // no character. See - // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - while (input_position > 0 && - base64_ignorable(input[input_position - 1], options)) { - input_position--; - } - } - outlen = output_position; - return {simdutf::error_code::SUCCESS, input_position}; -} +// base64_to_binary_safe_impl is moved to +// include/simdutf/base64_implementation.h #if SIMDUTF_ATOMIC_REF size_t atomic_binary_to_base64(const char *input, size_t length, char *output, @@ -27256,33 +22298,7 @@ simdutf_warn_unused size_t implementation::latin1_length_from_utf8( #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { - size_t answer = length; - size_t i = 0; - auto pop = [](uint64_t v) { - return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * - UINT64_C(0x0101010101010101) >> - 56); - }; - for (; i + 32 <= length; i += 32) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - memcpy(&v, input + i + 8, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 16, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 24, sizeof(v)); - answer += pop(v); - } - for (; i + 8 <= length; i += 8) { - uint64_t v; - memcpy(&v, input + i, sizeof(v)); - answer += pop(v); - } - for (; i + 1 <= length; i += 1) { - answer += static_cast(input[i]) >> 7; - } - return answer; + return scalar::latin1_to_utf8::utf8_length_from_latin1(input, length); } #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -33699,7 +28715,7 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf8( /* 0110 */ 1, /* 0111 */ 1, - // continutation bytes + // continuation bytes /* 1000 */ 0, /* 1001 */ 0, /* 1010 */ 0, @@ -44057,9 +39073,9 @@ std::pair ppc64_utf8_length_from_latin1(const char *input, * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). * * AMD XOP specific: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html - * Altivec has capabilites of AMD XOP (or vice versa): shuffle using 2 vectors + * Altivec has capabilities of AMD XOP (or vice versa): shuffle using 2 vectors * and variable shifts, thus this implementation shares some code solution - * (modulo intrisic function names). + * (modulo intrinsic function names). */ constexpr bool with_base64_std = false; @@ -47467,17 +42483,21 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_impl(ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert, buf, - len, latin1_output); + return convert_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, + buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_impl(ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert, buf, - len, latin1_output); + return convert_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, + buf, len, latin1_output); } simdutf_warn_unused result @@ -47486,8 +42506,9 @@ implementation::convert_utf16le_to_latin1_with_errors( return convert_with_errors_impl( ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert_with_errors, buf, - len, latin1_output); + scalar::utf16_to_latin1::convert_with_errors, + buf, len, latin1_output); } simdutf_warn_unused result @@ -47496,8 +42517,9 @@ implementation::convert_utf16be_to_latin1_with_errors( return convert_with_errors_impl( ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert_with_errors, buf, len, - latin1_output); + scalar::utf16_to_latin1::convert_with_errors, + buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( @@ -47518,16 +42540,18 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_impl(ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert, buf, - len, utf8_output); + scalar::utf16_to_utf8::convert, + buf, len, utf8_output); } simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_impl(ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert, buf, len, - utf8_output); + return convert_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert, + buf, len, utf8_output); } simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( @@ -47586,20 +42610,23 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_impl(ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert, buf, len, utf8_output); + scalar::utf32_to_utf8::convert, + buf, len, utf8_output); } simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_with_errors_impl( ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert_with_errors, buf, len, utf8_output); + scalar::utf32_to_utf8::convert_with_errors, buf, + len, utf8_output); } simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_impl(ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert, buf, len, utf8_output); + scalar::utf32_to_utf8::convert, + buf, len, utf8_output); } #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -51751,8 +46778,7 @@ sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { // t0 = [000a|aaaa|bbbb|bb00] const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = - _mm_and_si128(t0, v_1f00); // potentital first utf8 byte + const __m128i t1 = _mm_and_si128(t0, v_1f00); // potential first utf8 byte // t2 = [0000|0000|00bb|bbbb] const __m128i t2 = _mm_and_si128(in_16, v_003f); // potential second utf8 byte @@ -57240,25 +52266,31 @@ SIMDUTF_UNTARGET_REGION /* end file src/simdutf/westmere/end.h */ /* end file src/westmere/implementation.cpp */ #endif -#if SIMDUTF_IMPLEMENTATION_LSX -/* begin file src/lsx/implementation.cpp */ -/* begin file src/simdutf/lsx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lsx" -// #define SIMDUTF_IMPLEMENTATION lsx +#if SIMDUTF_IMPLEMENTATION_LASX +/* begin file src/lasx/implementation.cpp */ +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx #define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lsx/begin.h */ + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_TARGET_LASX +#endif +/* end file src/simdutf/lasx/begin.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { -#ifndef SIMDUTF_LSX_H - #error "lsx.h must be included" +#ifndef SIMDUTF_LASX_H + #error "lasx.h must be included" #endif using namespace simd; #if SIMDUTF_FEATURE_UTF8 // convert vmskltz/vmskgez/vmsknz to // simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index -const uint8_t lsx_1_2_utf8_bytes_mask[] = { +const uint8_t lasx_1_2_utf8_bytes_mask[] = { 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, @@ -57283,6 +52315,9 @@ const uint8_t lsx_1_2_utf8_bytes_mask[] = { simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { return __lsx_vshuf4i_b(vec, 0b10110001); } +simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { + return __lasx_xvshuf4i_b(vec, 0b10110001); +} #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ @@ -57353,7 +52388,7 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits // 1 byte: 00000000 00000000 // 2 byte: 00000aaa aa000000 - const __m128i v1f00 = lsx_splat_u16(0x1f00); + __m128i v1f00 = lsx_splat_u16(0x1f00); __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits // Combine with a shift right accumulate // 1 byte: 00000000 0bbbbbbb @@ -57365,7 +52400,7 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { // SIMDUTF_FEATURE_UTF32) #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lsx/lsx_validate_utf16.cpp */ +/* begin file src/lasx/lasx_validate_utf16.cpp */ template simd8 utf16_gather_high_bytes(const simd16 in0, const simd16 in1) { @@ -57379,101 +52414,117 @@ simd8 utf16_gather_high_bytes(const simd16 in0, return simd16::pack_shifted_right<8>(in0, in1); } } -/* end file src/lsx/lsx_validate_utf16.cpp */ +/* end file src/lasx/lasx_validate_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lsx/lsx_validate_utf32le.cpp */ -const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { +/* begin file src/lasx/lasx_validate_utf32le.cpp */ +const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { const char32_t *end = input + size; - __m128i offset = lsx_splat_u32(0xffff2000); - __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); - __m128i standardmax = lsx_splat_u32(0x10ffff); - __m128i currentmax = lsx_splat_u32(0); - __m128i currentoffsetmax = lsx_splat_u32(0); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input++; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return nullptr; + } + } - while (input + 4 < end) { - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - currentmax = __lsx_vmax_wu(in, currentmax); + __m256i offset = lasx_splat_u32(0xffff2000); + __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); + __m256i standardmax = lasx_splat_u32(0x10ffff); + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF currentoffsetmax = - __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); - - input += 4; + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); + input += 8; } - - __m128i is_zero = - __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); - if (__lsx_bnz_v(is_zero)) { + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { return nullptr; } - is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lsx_bnz_v(is_zero)) { + is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { return nullptr; } - return input; } -const result lsx_validate_utf32le_with_errors(const char32_t *input, - size_t size) { +const result lasx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { const char32_t *start = input; const char32_t *end = input + size; - __m128i offset = lsx_splat_u32(0xffff2000); - __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); - __m128i standardmax = lsx_splat_u32(0x10ffff); - __m128i currentmax = lsx_splat_u32(0); - __m128i currentoffsetmax = lsx_splat_u32(0); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, input - start); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, input - start); + } + input++; + } - while (input + 4 < end) { - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - currentmax = __lsx_vmax_wu(in, currentmax); + __m256i offset = lasx_splat_u32(0xffff2000); + __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); + __m256i standardmax = lasx_splat_u32(0x10ffff); + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); currentoffsetmax = - __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); - __m128i is_zero = - __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); - if (__lsx_bnz_v(is_zero)) { + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { return result(error_code::TOO_LARGE, input - start); } - - is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lsx_bnz_v(is_zero)) { + is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { return result(error_code::SURROGATE, input - start); } - - input += 4; + input += 8; } return result(error_code::SUCCESS, input - start); } -/* end file src/lsx/lsx_validate_utf32le.cpp */ +/* end file src/lasx/lasx_validate_utf32le.cpp */ #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +/* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ /* Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ std::pair -lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { +lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); + const size_t safety_margin = 12; const char *end = latin1_input + len; - __m128i zero = __lsx_vldi(0); // We always write 16 bytes, of which more than the first 8 bytes // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= 16) { + while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); - uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); - if (ascii == 0xffff) { // ASCII fast path!!!! + uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); + if (ascii_mask == 0xFFFF) { __lsx_vst(in8, utf8_output, 0); utf8_output += 16; latin1_input += 16; @@ -57481,48 +52532,76 @@ lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, } // We just fallback on UTF-16 code. This could be optimized/simplified // further. - __m128i in16 = __lsx_vilvl_b(zero, in8); + __m256i in16 = __lasx_vext2xv_hu_bu(____m256i(in8)); // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 + // input 8-bit word : [aabb|bbbb] x 16 + // expected output : [1100|00aa|10bb|bbbb] x 16 // t0 = [0000|00aa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in16, 2); + __m256i t0 = __lasx_xvslli_h(in16, 2); // t1 = [0000|00aa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x300)); + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x300)); // t3 = [0000|00aa|00bb|bbbb] - __m128i t2 = __lsx_vbitsel_v(t1, in16, __lsx_vrepli_h(0x3f)); + __m256i t2 = __lasx_xvbitsel_v(t1, in16, __lasx_xvrepli_h(0x3f)); // t4 = [1100|00aa|10bb|bbbb] - __m128i t3 = __lsx_vor_v(t2, __lsx_vreplgr2vr_h(uint16_t(0xc080))); + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvreplgr2vr_h(uint16_t(0xc080))); // merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = __lsx_vsle_hu(in16, __lsx_vrepli_h(0x7F)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t3, in16, one_byte_bytemask); + __m256i one_byte_bytemask = __lasx_xvsle_hu(in16, __lasx_xvrepli_h(0x7F)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t3, in16, one_byte_bytemask); - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[(ascii & 0xff)]][0]; - __m128i shuffle = __lsx_vld(row + 1, 0); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask & 0xFF)]][0]; + __m128i shuffle0 = __lsx_vld(row0 + 1, 0); + __m128i utf8_unpacked_lo = lasx_extracti128_lo(utf8_unpacked); + __m128i utf8_packed0 = + __lsx_vshuf_b(utf8_unpacked_lo, utf8_unpacked_lo, shuffle0); + __lsx_vst(utf8_packed0, utf8_output, 0); + utf8_output += row0[0]; - // store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // adjust pointers - latin1_input += 8; - utf8_output += row[0]; + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask >> 8)]][0]; + __m128i shuffle1 = __lsx_vld(row1 + 1, 0); + __m128i utf8_unpacked_hi = lasx_extracti128_hi(utf8_unpacked); + __m128i utf8_packed1 = + __lsx_vshuf_b(utf8_unpacked_hi, utf8_unpacked_hi, shuffle1); + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + latin1_input += 16; } // while return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); } -/* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +/* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +/* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ std::pair -lsx_convert_latin1_to_utf16le(const char *buf, size_t len, - char16_t *utf16_output) { +lasx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { const char *end = buf + len; - __m128i zero = __lsx_vldi(0); - while (end - buf >= 16) { + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = uint8_t(*buf) & 0xFF; + buf++; + } + + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i inlow = __lasx_vext2xv_hu_bu(in8); + __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001); + __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + + utf16_output += 32; + buf += 32; + } + + if (end - buf >= 16) { + __m128i zero = __lsx_vldi(0); __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i inlow = __lsx_vilvl_b(zero, in8); @@ -57533,20 +52612,38 @@ lsx_convert_latin1_to_utf16le(const char *buf, size_t len, utf16_output += 16; buf += 16; } - return std::make_pair(buf, utf16_output); } std::pair -lsx_convert_latin1_to_utf16be(const char *buf, size_t len, - char16_t *utf16_output) { +lasx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { const char *end = buf + len; - __m128i zero = __lsx_vldi(0); - while (end - buf >= 16) { + + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = char16_t((uint16_t(*buf++) << 8)); + } + + __m256i zero = __lasx_xvldi(0); + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000); + + __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero); + __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + utf16_output += 32; + buf += 32; + } + + if (end - buf >= 16) { + __m128i zero_128 = __lsx_vldi(0); __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i inlow = __lsx_vilvl_b(in8, zero); - __m128i inhigh = __lsx_vilvh_b(in8, zero); + __m128i inlow = __lsx_vilvl_b(in8, zero_128); + __m128i inhigh = __lsx_vilvh_b(in8, zero_128); __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); utf16_output += 16; @@ -57555,16 +52652,44 @@ lsx_convert_latin1_to_utf16be(const char *buf, size_t len, return std::make_pair(buf, utf16_output); } -/* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +/* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +/* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ std::pair -lsx_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { +lasx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { const char *end = buf + len; - while (end - buf >= 16) { + // LASX requires 32-byte alignment, otherwise performance will be degraded + while (((uint64_t)utf32_output & 0x1F) && buf < end) { + *utf32_output++ = ((uint32_t)*buf) & 0xFF; + buf++; + } + + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); + __lasx_xvst(in32_0, reinterpret_cast(utf32_output), 0); + + __m256i in8_1 = __lasx_xvpermi_d(in8, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(utf32_output), 32); + + __m256i in8_2 = __lasx_xvpermi_d(in8, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(utf32_output), 64); + + __m256i in8_3 = __lasx_xvpermi_d(in8, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(utf32_output), 96); + + utf32_output += 32; + buf += 32; + } + + if (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i zero = __lsx_vldi(0); @@ -57576,9 +52701,9 @@ lsx_convert_latin1_to_utf32(const char *buf, size_t len, __m128i in32_3 = __lsx_vilvh_h(zero, in16high); __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output + 4), 0); - __lsx_vst(in32_2, reinterpret_cast(utf32_output + 8), 0); - __lsx_vst(in32_3, reinterpret_cast(utf32_output + 12), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); utf32_output += 16; buf += 16; @@ -57586,11 +52711,11 @@ lsx_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } -/* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +/* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +/* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. @@ -57615,15 +52740,22 @@ size_t convert_masked_utf8_to_utf16(const char *input, // We first try a few fast paths. // The obvious first test is ASCII, which actually consumes the full 16. if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - // We process in chunks of 16 bytes - // The routine in simd.h is reused. - simd8 temp{in}; - temp.store_ascii_as_utf16(utf16_output); + __m128i zero = __lsx_vldi(0); + if simdutf_constexpr (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, in), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(zero, in), + reinterpret_cast(utf16_output), 16); + } else { + __lsx_vst(__lsx_vilvl_b(in, zero), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(in, zero), + reinterpret_cast(utf16_output), 16); + } utf16_output += 16; // We wrote 16 16-bit characters. return 16; // We consumed 16 bytes. } - uint64_t buffer[2]; // 3 byte sequences are the next most common, as seen in CJK, which has long // sequences of these. if (input_utf8_end_of_code_point_mask == 0x924) { @@ -57651,8 +52783,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 6; // We wrote 6 16-bit characters. - return 12; // We consumed 12 bytes. + utf16_output += 8; // We wrote 6 16-bit characters. + return 16; // We consumed 12 bytes. } /// We do not have a fast path available, or the fast path is unimportant, so @@ -57724,13 +52856,6 @@ size_t convert_masked_utf8_to_utf16(const char *input, } else if (idx < 209) { // THREE (3) input code-code units if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. - __m128i expected_mask = (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; @@ -57740,6 +52865,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); if (__lsx_bz_b(check)) return 12; + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + // Swap byte pairs // 10dddddd 10cccccc|10bbbbbb 11110aaa // 10cccccc 10dddddd|11110aaa 10bbbbbb @@ -57757,7 +52889,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); // Generate unadjusted trail surrogate minus lowest 2 bits // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000ff00)); + __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000FF00)); // Insert low 2 bits of trail surrogate to magic number for later // 11011100 00000000 11100111 110000cc __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); @@ -57780,9 +52912,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, if simdutf_constexpr (!match_system(big_endian)) { composed = lsx_swap_bytes(composed); } - // __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - __lsx_vst(composed, reinterpret_cast(buffer), 0); - std::memcpy(utf16_output, buffer, 12); + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); utf16_output += 6; // We 3 32-bit surrogate pairs. return 12; // We consumed 12 bytes. } @@ -57882,10 +53012,10 @@ size_t convert_masked_utf8_to_utf16(const char *input, return 12; } } -/* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +/* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +/* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. @@ -57912,8 +53042,19 @@ size_t convert_masked_utf8_to_utf32(const char *input, // We process in chunks of 16 bytes. // use fast implementation in src/simdutf/arm64/simd.h // Ideally the compiler can keep the tables in registers. - simd8 temp{in}; - temp.store_ascii_as_utf32_tbl(utf32_out); + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in); + __m128i in16high = __lsx_vilvh_b(zero, in); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); + utf32_output += 16; // We wrote 16 32-bit characters. return 16; // We consumed 16 bytes. } @@ -57942,7 +53083,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, utf32_output += 6; return 12; // We consumed 12 bytes. } - /// Either no fast path or an unimportant fast path. + // Either no fast path or an unimportant fast path. const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex [input_utf8_end_of_code_point_mask][0]; @@ -58065,10 +53206,10 @@ size_t convert_masked_utf8_to_utf32(const char *input, return 12; } } -/* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +/* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ size_t convert_masked_utf8_to_latin1(const char *input, uint64_t utf8_end_of_code_point_mask, char *&latin1_output) { @@ -58137,22 +53278,19 @@ size_t convert_masked_utf8_to_latin1(const char *input, // writing 8 bytes even though we only care about the first 6 bytes. __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); - uint64_t buffer[2]; - // __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - __lsx_vst(latin1_packed, reinterpret_cast(buffer), 0); - std::memcpy(latin1_output, buffer, 6); + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); latin1_output += 6; // We wrote 6 bytes. return consumed; } -/* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +/* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ template std::pair -lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *end = buf + len; while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); @@ -58178,8 +53316,8 @@ lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, template std::pair -lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; while (end - buf >= 16) { @@ -58213,12 +53351,12 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 -/* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +/* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ /* - The vectorized algorithm works on single SSE register i.e., it + The vectorized algorithm works on single LASX register i.e., it loads eight 16-bit code units. We consider three cases: @@ -58236,7 +53374,7 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single + codes and finally compress the whole LASX register with a single shuffle. We need 256-entry lookup table to get a compression pattern @@ -58254,7 +53392,7 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, the three-UTF8-bytes case. Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. + array of 32-bit values. The array spans two LASX registers. The bytes from the registers are compressed using two shuffles. We need 256-entry lookup table to get a compression pattern @@ -58269,9 +53407,10 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ + template std::pair -lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { +lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *end = buf + len; @@ -58279,82 +53418,77 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lsx_swap_bytes(in); + in = lasx_swap_bytes(in); } - if (__lsx_bz_v( - __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - if simdutf_constexpr (!match_system(big_endian)) { - nextin = lsx_swap_bytes(nextin); - } - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(nextin, in); - // 2. store (16 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } else { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(in, in); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 // t0 = [000a|aaaa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in, 2); + __m256i t0 = __lasx_xvslli_h(in, 2); // t1 = [000a|aaaa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - __m128i t3 = __lsx_vor_v(t1, t2); + __m256i t3 = __lasx_xvor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); - __m128i t4 = __lsx_vor_v(t3, v_c080); + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); // 4. pack the bytes - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; continue; } - __m128i surrogates_bytemask = __lsx_vseq_h( - __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); + __m256i surrogates_bytemask = __lasx_xvseq_h( + __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { + if (__lasx_xbz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -58384,74 +53518,94 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(in, in); - t0 = __lsx_vilvl_b(t0, t0); + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(in, 12); + __m256i s0 = __lasx_xvsrli_h(in, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(in, 2); + __m256i s1 = __lasx_xvslli_h(in, 2); // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); + __m256i s2 = __lasx_xvor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - const uint32_t mask0 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, - one_byte_bytemask_low)), - 0); - const uint32_t mask1 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, - one_byte_bytemask_high)), - 0); + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -58506,8 +53660,8 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { */ template std::pair -lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { +lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *start = buf; const char16_t *end = buf + len; @@ -58515,82 +53669,78 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 + + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lsx_swap_bytes(in); + in = lasx_swap_bytes(in); } - if (__lsx_bz_v( - __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - if simdutf_constexpr (!match_system(big_endian)) { - nextin = lsx_swap_bytes(nextin); - } - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(nextin, in); - // 2. store (16 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } else { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(in, in); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 // t0 = [000a|aaaa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in, 2); + __m256i t0 = __lasx_xvslli_h(in, 2); // t1 = [000a|aaaa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - __m128i t3 = __lsx_vor_v(t1, t2); + __m256i t3 = __lasx_xvor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); - __m128i t4 = __lsx_vor_v(t3, v_c080); + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); // 4. pack the bytes - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; continue; } - __m128i surrogates_bytemask = __lsx_vseq_h( - __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); + __m256i surrogates_bytemask = __lasx_xvseq_h( + __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { + if (__lasx_xbz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -58620,74 +53770,94 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(in, in); - t0 = __lsx_vilvl_b(t0, t0); + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(in, 12); + __m256i s0 = __lasx_xvsrli_h(in, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(in, 2); + __m256i s1 = __lasx_xvslli_h(in, 2); // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); + __m256i s2 = __lasx_xvor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - const uint32_t mask0 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, - one_byte_bytemask_low)), - 0); - const uint32_t mask1 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, - one_byte_bytemask_high)), - 0); + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -58735,38 +53905,63 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output)); } -/* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +/* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ template std::pair -lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { +lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *end = buf + len; - __m128i zero = __lsx_vldi(0); - __m128i v_f800 = lsx_splat_u16(0xf800); - __m128i v_d800 = lsx_splat_u16(0xd800); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = scalar::utf16::swap_if_needed(buf[0]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else { + if (buf + 1 >= end) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } + } - while (end - buf >= 8) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m256i v_f800 = lasx_splat_u16(0xf800); + __m256i v_d800 = lasx_splat_u16(0xd800); + + while (end - buf >= 16) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lsx_swap_bytes(in); + in = lasx_swap_bytes(in); } - __m128i surrogates_bytemask = - __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { + if (__lasx_xbz_v(surrogates_bytemask)) { // case: no surrogate pairs, extend all 16-bit code units to 32-bit code // units - __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); - __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); - utf32_output += 8; - buf += 8; + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -58811,31 +54006,56 @@ lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, */ template std::pair -lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { +lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *start = buf; const char16_t *end = buf + len; - __m128i zero = __lsx_vldi(0); - __m128i v_f800 = lsx_splat_u16(0xf800); - __m128i v_d800 = lsx_splat_u16(0xd800); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = scalar::utf16::swap_if_needed(buf[0]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else if (buf + 1 < end) { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } else { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); + } + } - while (end - buf >= 8) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m256i v_f800 = lasx_splat_u16(0xf800); + __m256i v_d800 = lasx_splat_u16(0xd800); + while (end - buf >= 16) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lsx_swap_bytes(in); + in = lasx_swap_bytes(in); } - __m128i surrogates_bytemask = - __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); - if (__lsx_bz_v(surrogates_bytemask)) { + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lasx_xbz_v(surrogates_bytemask)) { // case: no surrogate pairs, extend all 16-bit code units to 32-bit code // units - __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); - __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); - utf32_output += 8; - buf += 8; + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -58872,31 +54092,35 @@ lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf32_output)); } -/* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +/* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ std::pair -lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *end = buf + len; - const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; - __m128i v_ff = __lsx_vrepli_w(0xFF); + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); while (end - buf >= 16) { - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - __m128i in12 = __lsx_vor_v(in1, in2); - if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { // 1. pack the bytes - __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); // 2. store (8 bytes) __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); // 3. adjust pointers - buf += 8; - latin1_output += 8; + buf += 16; + latin1_output += 16; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -58905,31 +54129,34 @@ lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, } std::pair -lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *start = buf; const char32_t *end = buf + len; - const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; - __m128i v_ff = __lsx_vrepli_w(0xFF); + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); while (end - buf >= 16) { - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); - - __m128i in12 = __lsx_vor_v(in1, in2); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { // 1. pack the bytes - __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); // 2. store (8 bytes) __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); // 3. adjust pointers - buf += 8; - latin1_output += 8; + buf += 16; + latin1_output += 16; } else { // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { + for (int k = 0; k < 16; k++) { uint32_t word = buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); @@ -58943,188 +54170,253 @@ lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ std::pair -lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { +lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *end = buf + len; - __m128i v_c080 = lsx_splat_u16(0xc080); - __m128i v_07ff = lsx_splat_u16(0x07ff); - __m128i v_dfff = lsx_splat_u16(0xdfff); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i forbidden_bytemask = __lsx_vldi(0x0); + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + + __m256i v_c080 = lasx_splat_u16(0xc080); + __m256i v_07ff = lasx_splat_u16(0x07ff); + __m256i v_dfff = lasx_splat_u16(0xdfff); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) - __m128i utf16_packed = __lsx_vpickev_h(nextin, in); + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! // 1. pack the bytes // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); // 3. adjust pointers - buf += 8; - utf8_output += 8; + buf += 16; + utf8_output += 16; continue; // we are done for this round! } - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = __lsx_vor_v(t1, t2); + const __m256i t3 = __lasx_xvor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = __lsx_vor_v(t3, v_c080); + const __m256i t4 = __lasx_xvor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = - __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); // 4. pack the bytes - const uint8_t *row = + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; continue; } else { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single - UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three - UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); - t0 = __lsx_vilvl_b(t0, t0); + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); + __m256i s2 = __lasx_xvor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); - __m128i one_or_two_bytes_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - __m128i one_byte_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - const uint32_t mask0 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)), - 0); - const uint32_t mask1 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)), - 0); + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; } // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> // will produce four UTF-8 bytes. @@ -59168,197 +54460,262 @@ lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { } // while // check for invalid input - if (__lsx_bnz_v(forbidden_bytemask)) { + if (__lasx_xbnz_v(forbidden_bytemask)) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); } - return std::make_pair(buf, reinterpret_cast(utf8_output)); } std::pair -lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { +lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *start = buf; const char32_t *end = buf + len; - __m128i v_c080 = lsx_splat_u16(0xc080); - __m128i v_07ff = lsx_splat_u16(0x07ff); - __m128i v_dfff = lsx_splat_u16(0xdfff); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i forbidden_bytemask = __lsx_vldi(0x0); + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + + __m256i v_c080 = lasx_splat_u16(0xc080); + __m256i v_07ff = lasx_splat_u16(0x07ff); + __m256i v_dfff = lasx_splat_u16(0xdfff); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) - __m128i utf16_packed = __lsx_vpickev_h(nextin, in); + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! // 1. pack the bytes // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); // 3. adjust pointers - buf += 8; - utf8_output += 8; + buf += 16; + utf8_output += 16; continue; // we are done for this round! } - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = __lsx_vor_v(t1, t2); + const __m256i t3 = __lasx_xvor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = __lsx_vor_v(t3, v_c080); + const __m256i t4 = __lasx_xvor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = - __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); // 4. pack the bytes - const uint8_t *row = + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; continue; } else { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); - if (__lsx_bnz_v(forbidden_bytemask)) { + if (__lasx_xbnz_v(forbidden_bytemask)) { return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf8_output)); } /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single - UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three - UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); - t0 = __lsx_vilvl_b(t0, t0); + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3F00)); // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); + __m256i s2 = __lasx_xvor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); - __m128i one_or_two_bytes_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - __m128i one_byte_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - const uint32_t mask0 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)), - 0); - const uint32_t mask1 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)), - 0); + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; } // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> // will produce four UTF-8 bytes. @@ -59406,41 +54763,74 @@ lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output)); } -/* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ template std::pair -lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { +lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *end = buf + len; - __m128i forbidden_bytemask = __lsx_vrepli_h(0); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i v_dfff = lsx_splat_u16(0xdfff); - while (end - buf >= 8) { - __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + // buf++; + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + // buf++; + } + } + + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i v_dfff = lasx_splat_u16(0xdfff); + while (end - buf >= 16) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { - __m128i utf16_packed = __lsx_vpickev_h(in1, in0); - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); if simdutf_constexpr (!match_system(big_endian)) { - utf16_packed = lsx_swap_bytes(utf16_packed); + utf16_packed = lasx_swap_bytes(utf16_packed); } - __lsx_vst(utf16_packed, utf16_output, 0); - utf16_output += 8; - buf += 8; + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; } else { - size_t forward = 3; + size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -59479,7 +54869,7 @@ lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, } // check for invalid input - if (__lsx_bnz_v(forbidden_bytemask)) { + if (__lasx_xbnz_v(forbidden_bytemask)) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); } return std::make_pair(buf, reinterpret_cast(utf16_output)); @@ -59487,42 +54877,72 @@ lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, template std::pair -lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { +lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *start = buf; const char32_t *end = buf + len; - __m128i forbidden_bytemask = __lsx_vrepli_h(0); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i v_dfff = lsx_splat_u16(0xdfff); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } - while (end - buf >= 8) { - __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { - __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i v_dfff = lasx_splat_u16(0xdfff); + while (end - buf >= 16) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); - if (__lsx_bnz_v(forbidden_bytemask)) { + if (__lasx_xbnz_v(forbidden_bytemask)) { return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf16_output)); } if simdutf_constexpr (!match_system(big_endian)) { - utf16_packed = lsx_swap_bytes(utf16_packed); + utf16_packed = lasx_swap_bytes(utf16_packed); } - __lsx_vst(utf16_packed, utf16_output, 0); - utf16_output += 8; - buf += 8; + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; } else { - size_t forward = 3; + size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -59565,10 +54985,10 @@ lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf16_output)); } -/* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_BASE64 -/* begin file src/lsx/lsx_base64.cpp */ +/* begin file src/lasx/lasx_base64.cpp */ /** * References and further reading: * @@ -59609,100 +55029,118 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; uint8_t *out = (uint8_t *)dst; - v16u8 shuf; - __m128i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + v32u8 shuf; + __m256i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, base64_tbl2, base64_tbl3; - if (srclen >= 16) { - shuf = v16u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; - v_fc0fc00 = __lsx_vreplgr2vr_w(uint32_t(0x0fc0fc00)); - v_3f03f0 = __lsx_vreplgr2vr_w(uint32_t(0x003f03f0)); - shift_r = __lsx_vreplgr2vr_w(uint32_t(0x0006000a)); - shift_l = __lsx_vreplgr2vr_w(uint32_t(0x00080004)); - base64_tbl0 = __lsx_vld(lookup_tbl, 0); - base64_tbl1 = __lsx_vld(lookup_tbl, 16); - base64_tbl2 = __lsx_vld(lookup_tbl, 32); - base64_tbl3 = __lsx_vld(lookup_tbl, 48); - } + if (srclen >= 28) { + shuf = v32u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, + 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + v_fc0fc00 = __lasx_xvreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lasx_xvreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lasx_xvreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lasx_xvreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = ____m256i(__lsx_vld(lookup_tbl, 0)); + base64_tbl1 = ____m256i(__lsx_vld(lookup_tbl, 16)); + base64_tbl2 = ____m256i(__lsx_vld(lookup_tbl, 32)); + base64_tbl3 = ____m256i(__lsx_vld(lookup_tbl, 48)); + } size_t i = 0; - for (; i + 52 <= srclen; i += 48) { - __m128i in0 = + for (; i + 100 <= srclen; i += 96) { + __m128i in0_lo = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); - __m128i in1 = + __m128i in0_hi = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); - __m128i in2 = + __m128i in1_lo = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); - __m128i in3 = + __m128i in1_hi = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); + __m128i in2_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 4); + __m128i in2_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 5); + __m128i in3_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 6); + __m128i in3_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 7); - in0 = __lsx_vshuf_b(in0, in0, (__m128i)shuf); - in1 = __lsx_vshuf_b(in1, in1, (__m128i)shuf); - in2 = __lsx_vshuf_b(in2, in2, (__m128i)shuf); - in3 = __lsx_vshuf_b(in3, in3, (__m128i)shuf); + __m256i in0 = lasx_set_q(in0_hi, in0_lo); + __m256i in1 = lasx_set_q(in1_hi, in1_lo); + __m256i in2 = lasx_set_q(in2_hi, in2_lo); + __m256i in3 = lasx_set_q(in3_hi, in3_lo); - __m128i t0_0 = __lsx_vand_v(in0, v_fc0fc00); - __m128i t0_1 = __lsx_vand_v(in1, v_fc0fc00); - __m128i t0_2 = __lsx_vand_v(in2, v_fc0fc00); - __m128i t0_3 = __lsx_vand_v(in3, v_fc0fc00); + in0 = __lasx_xvshuf_b(in0, in0, (__m256i)shuf); + in1 = __lasx_xvshuf_b(in1, in1, (__m256i)shuf); + in2 = __lasx_xvshuf_b(in2, in2, (__m256i)shuf); + in3 = __lasx_xvshuf_b(in3, in3, (__m256i)shuf); - __m128i t1_0 = __lsx_vsrl_h(t0_0, shift_r); - __m128i t1_1 = __lsx_vsrl_h(t0_1, shift_r); - __m128i t1_2 = __lsx_vsrl_h(t0_2, shift_r); - __m128i t1_3 = __lsx_vsrl_h(t0_3, shift_r); + __m256i t0_0 = __lasx_xvand_v(in0, v_fc0fc00); + __m256i t0_1 = __lasx_xvand_v(in1, v_fc0fc00); + __m256i t0_2 = __lasx_xvand_v(in2, v_fc0fc00); + __m256i t0_3 = __lasx_xvand_v(in3, v_fc0fc00); - __m128i t2_0 = __lsx_vand_v(in0, v_3f03f0); - __m128i t2_1 = __lsx_vand_v(in1, v_3f03f0); - __m128i t2_2 = __lsx_vand_v(in2, v_3f03f0); - __m128i t2_3 = __lsx_vand_v(in3, v_3f03f0); + __m256i t1_0 = __lasx_xvsrl_h(t0_0, shift_r); + __m256i t1_1 = __lasx_xvsrl_h(t0_1, shift_r); + __m256i t1_2 = __lasx_xvsrl_h(t0_2, shift_r); + __m256i t1_3 = __lasx_xvsrl_h(t0_3, shift_r); - __m128i t3_0 = __lsx_vsll_h(t2_0, shift_l); - __m128i t3_1 = __lsx_vsll_h(t2_1, shift_l); - __m128i t3_2 = __lsx_vsll_h(t2_2, shift_l); - __m128i t3_3 = __lsx_vsll_h(t2_3, shift_l); + __m256i t2_0 = __lasx_xvand_v(in0, v_3f03f0); + __m256i t2_1 = __lasx_xvand_v(in1, v_3f03f0); + __m256i t2_2 = __lasx_xvand_v(in2, v_3f03f0); + __m256i t2_3 = __lasx_xvand_v(in3, v_3f03f0); - __m128i input0 = __lsx_vor_v(t1_0, t3_0); - __m128i input0_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input0); - __m128i input0_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input0, __lsx_vldi(32))); - __m128i input0_mask = __lsx_vslei_bu(input0, 31); - __m128i input0_result = - __lsx_vbitsel_v(input0_shuf1, input0_shuf0, input0_mask); - __lsx_vst(input0_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; + __m256i t3_0 = __lasx_xvsll_h(t2_0, shift_l); + __m256i t3_1 = __lasx_xvsll_h(t2_1, shift_l); + __m256i t3_2 = __lasx_xvsll_h(t2_2, shift_l); + __m256i t3_3 = __lasx_xvsll_h(t2_3, shift_l); - __m128i input1 = __lsx_vor_v(t1_1, t3_1); - __m128i input1_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input1); - __m128i input1_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input1, __lsx_vldi(32))); - __m128i input1_mask = __lsx_vslei_bu(input1, 31); - __m128i input1_result = - __lsx_vbitsel_v(input1_shuf1, input1_shuf0, input1_mask); - __lsx_vst(input1_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; + __m256i input0 = __lasx_xvor_v(t1_0, t3_0); + __m256i input0_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input0); + __m256i input0_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input0, __lasx_xvldi(32))); + __m256i input0_mask = __lasx_xvslei_bu(input0, 31); + __m256i input0_result = + __lasx_xvbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lasx_xvst(input0_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; - __m128i input2 = __lsx_vor_v(t1_2, t3_2); - __m128i input2_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input2); - __m128i input2_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input2, __lsx_vldi(32))); - __m128i input2_mask = __lsx_vslei_bu(input2, 31); - __m128i input2_result = - __lsx_vbitsel_v(input2_shuf1, input2_shuf0, input2_mask); - __lsx_vst(input2_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; + __m256i input1 = __lasx_xvor_v(t1_1, t3_1); + __m256i input1_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input1); + __m256i input1_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input1, __lasx_xvldi(32))); + __m256i input1_mask = __lasx_xvslei_bu(input1, 31); + __m256i input1_result = + __lasx_xvbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lasx_xvst(input1_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; - __m128i input3 = __lsx_vor_v(t1_3, t3_3); - __m128i input3_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input3); - __m128i input3_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input3, __lsx_vldi(32))); - __m128i input3_mask = __lsx_vslei_bu(input3, 31); - __m128i input3_result = - __lsx_vbitsel_v(input3_shuf1, input3_shuf0, input3_mask); - __lsx_vst(input3_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; + __m256i input2 = __lasx_xvor_v(t1_2, t3_2); + __m256i input2_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input2); + __m256i input2_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input2, __lasx_xvldi(32))); + __m256i input2_mask = __lasx_xvslei_bu(input2, 31); + __m256i input2_result = + __lasx_xvbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lasx_xvst(input2_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + + __m256i input3 = __lasx_xvor_v(t1_3, t3_3); + __m256i input3_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input3); + __m256i input3_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input3, __lasx_xvldi(32))); + __m256i input3_mask = __lasx_xvslei_bu(input3, 31); + __m256i input3_result = + __lasx_xvbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lasx_xvst(input3_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; } - for (; i + 16 <= srclen; i += 12) { + for (; i + 28 <= srclen; i += 24) { - __m128i in = __lsx_vld(reinterpret_cast(input + i), 0); + __m128i in_lo = __lsx_vld(reinterpret_cast(input + i), 0); + __m128i in_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + + __m256i in = lasx_set_q(in_hi, in_lo); // bytes from groups A, B and C are needed in separate 32-bit lanes // in = [DDDD|CCCC|BBBB|AAAA] @@ -59716,33 +55154,32 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ // processed bits - in = __lsx_vshuf_b(in, in, (__m128i)shuf); + in = __lasx_xvshuf_b(in, in, (__m256i)shuf); // unpacking // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - __m128i t0 = __lsx_vand_v(in, v_fc0fc00); + __m256i t0 = __lasx_xvand_v(in, v_fc0fc00); // t1 = [00000000|00cccccc|00000000|00aaaaaa] // ((c >> 6), (a >> 10)) - __m128i t1 = __lsx_vsrl_h(t0, shift_r); + __m256i t1 = __lasx_xvsrl_h(t0, shift_r); // t2 = [00000000|00dddddd|000000bb|bbbb0000] - __m128i t2 = __lsx_vand_v(in, v_3f03f0); + __m256i t2 = __lasx_xvand_v(in, v_3f03f0); // t3 = [00dddddd|00000000|00bbbbbb|00000000] // ((d << 8), (b << 4)) - __m128i t3 = __lsx_vsll_h(t2, shift_l); + __m256i t3 = __lasx_xvsll_h(t2, shift_l); // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - __m128i indices = __lsx_vor_v(t1, t3); - - __m128i indices_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, indices); - __m128i indices_shuf1 = __lsx_vshuf_b( - base64_tbl3, base64_tbl2, __lsx_vsub_b(indices, __lsx_vldi(32))); - __m128i indices_mask = __lsx_vslei_bu(indices, 31); - __m128i indices_result = - __lsx_vbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + __m256i indices = __lasx_xvor_v(t1, t3); - __lsx_vst(indices_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; + __m256i indices_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, indices); + __m256i indices_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(indices, __lasx_xvldi(32))); + __m256i indices_mask = __lasx_xvslei_bu(indices, 31); + __m256i indices_result = + __lasx_xvbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + __lasx_xvst(indices_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; } return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, @@ -59766,7 +55203,7 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { tables::base64::thintable_epi8[mask2]}; // we increment by 0x08 the second half of the mask - v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + const v4u32 hi = {0, 0, 0x08080808, 0x08080808}; __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); // this is the version "nearly pruned" @@ -59788,115 +55225,111 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { } struct block64 { - __m128i chunks[4]; + __m256i chunks[2]; }; template -static inline uint16_t to_base64_mask(__m128i *src, bool *error) { - const v16u8 ascii_space_tbl = {0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}; +static inline uint32_t to_base64_mask(__m256i *src, bool *error) { + __m256i ascii_space_tbl = + ____m256i((__m128i)v16u8{0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}); // credit: aqrit - /* - '0'(0x30)-'9'(0x39) => delta_values_index = 4 - 'A'(0x41)-'Z'(0x5a) => delta_values_index = 4/5/12(4+8) - 'a'(0x61)-'z'(0x7a) => delta_values_index = 6/7/14(6+8) - '+'(0x2b) => delta_values_index = 3 - '/'(0x2f) => delta_values_index = 2+8 = 10 - '-'(0x2d) => delta_values_index = 2+8 = 10 - '_'(0x5f) => delta_values_index = 5+8 = 13 - */ - v16u8 delta_asso; + __m256i delta_asso; if (default_or_url) { - delta_asso = v16u8{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16}; + delta_asso = + ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x11, 0x0, 0x16}); } else { - delta_asso = v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}; + delta_asso = + ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}); } - v16i8 delta_values; + __m256i delta_values; if (default_or_url) { - delta_values = - v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), - int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}; + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), + int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}); } else if (base64_url) { - delta_values = - v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), - int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}; + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}); } else { - delta_values = - v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}; + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}); } - v16u8 check_asso; + __m256i check_asso; if (default_or_url) { - check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06}; + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0E, 0x0B, 0x06}); + } else if (base64_url) { - check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x06, 0x0B, 0x12}; + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x06, 0x0B, 0x12}); } else { - check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F}; + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0B, 0x0B, 0x0F}); } - v16i8 check_values; + __m256i check_values; if (default_or_url) { - check_values = - v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), - int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}; + + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), + int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}); } else if (base64_url) { - check_values = v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), - int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}; + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}); } else { - check_values = - v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}; + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}); } - const __m128i shifted = __lsx_vsrli_b(*src, 3); - __m128i asso_index = __lsx_vand_v(*src, __lsx_vldi(0xF)); - const __m128i delta_hash = - __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)delta_asso, (__m128i)delta_asso, - (__m128i)asso_index), - shifted); - const __m128i check_hash = - __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)check_asso, (__m128i)check_asso, - (__m128i)asso_index), - shifted); + __m256i shifted = __lasx_xvsrli_b(*src, 3); + __m256i asso_index = __lasx_xvand_v(*src, __lasx_xvldi(0xF)); + __m256i delta_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(delta_asso, delta_asso, asso_index), shifted); + __m256i check_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(check_asso, check_asso, asso_index), shifted); - const __m128i out = - __lsx_vsadd_b(__lsx_vshuf_b((__m128i)delta_values, (__m128i)delta_values, - (__m128i)delta_hash), - *src); - const __m128i chk = - __lsx_vsadd_b(__lsx_vshuf_b((__m128i)check_values, (__m128i)check_values, - (__m128i)check_hash), - *src); - unsigned int mask = __lsx_vpickve2gr_hu(__lsx_vmskltz_b(chk), 0); + __m256i out = __lasx_xvsadd_b( + __lasx_xvshuf_b(delta_values, delta_values, delta_hash), *src); + __m256i chk = __lasx_xvsadd_b( + __lasx_xvshuf_b(check_values, check_values, check_hash), *src); + __m256i chk_ltz = __lasx_xvmskltz_b(chk); + unsigned int mask = __lasx_xvpickve2gr_wu(chk_ltz, 0); + mask = mask | (__lsx_vpickve2gr_hu(lasx_extracti128_hi(chk_ltz), 0) << 16); if (mask) { - __m128i ascii_space = __lsx_vseq_b(__lsx_vshuf_b((__m128i)ascii_space_tbl, - (__m128i)ascii_space_tbl, - (__m128i)asso_index), - *src); - *error |= - (mask != __lsx_vpickve2gr_hu(__lsx_vmskltz_b((__m128i)ascii_space), 0)); + __m256i ascii_space = __lasx_xvseq_b( + __lasx_xvshuf_b(ascii_space_tbl, ascii_space_tbl, asso_index), *src); + __m256i ascii_space_ltz = __lasx_xvmskltz_b(ascii_space); + unsigned int ascii_space_mask = __lasx_xvpickve2gr_wu(ascii_space_ltz, 0); + ascii_space_mask = + ascii_space_mask | + (__lsx_vpickve2gr_hu(lasx_extracti128_hi(ascii_space_ltz), 0) << 16); + *error |= (mask != ascii_space_mask); } *src = out; - return (uint16_t)mask; + return (uint32_t)mask; } template @@ -59906,18 +55339,12 @@ static inline uint64_t to_base64_mask(block64 *b, bool *error) { to_base64_mask(&b->chunks[0], error); uint64_t m1 = to_base64_mask(&b->chunks[1], error); - uint64_t m2 = - to_base64_mask(&b->chunks[2], error); - uint64_t m3 = - to_base64_mask(&b->chunks[3], error); - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + return m0 | (m1 << 32); } static inline void copy_block(block64 *b, char *output) { - __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output), 0); - __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output), 16); - __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output), 32); - __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output), 48); + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output), 32); } static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { @@ -59925,84 +55352,150 @@ static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { uint64_t count = __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); uint16_t *count_ptr = (uint16_t *)&count; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), output + count_ptr[0]); - compress(b->chunks[2], uint16_t(mask >> 32), + compress(lasx_extracti128_lo(b->chunks[0]), uint16_t(mask), output); + compress(lasx_extracti128_hi(b->chunks[0]), uint16_t(mask >> 16), + output + count_ptr[0]); + compress(lasx_extracti128_lo(b->chunks[1]), uint16_t(mask >> 32), output + count_ptr[0] + count_ptr[1]); - compress(b->chunks[3], uint16_t(mask >> 48), + compress(lasx_extracti128_hi(b->chunks[1]), uint16_t(mask >> 48), output + count_ptr[0] + count_ptr[1] + count_ptr[2]); return count_ones(nmask); } +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +inline size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + + // Predefine the index vector + const v16u8 v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + switch (pos64 >> 4) { + case 0b00: { + const __m128i lane0 = lasx_extracti128_lo(b->chunks[0]); + const __m128i lane1 = lasx_extracti128_hi(b->chunks[0]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); // v1 > v0 + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane0, lane0, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(lane1, reinterpret_cast<__m128i *>(output + 1 * 16 - 1), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output + 2 * 16 - 1), + 0); + } break; + case 0b01: { + const __m128i lane0 = lasx_extracti128_lo(b->chunks[0]); + const __m128i lane1 = lasx_extracti128_hi(b->chunks[0]); + __lsx_vst(lane0, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane1, lane1, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output + 2 * 16 - 1), + 0); + } break; + case 0b10: { + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output + 0 * 16), 0); + + const __m128i lane2 = lasx_extracti128_lo(b->chunks[1]); + const __m128i lane3 = lasx_extracti128_hi(b->chunks[1]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane2, lane2, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 2 * 16), 0); + __lsx_vst(lane3, reinterpret_cast<__m128i *>(output + 3 * 16 - 1), 0); + } break; + case 0b11: { + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output + 0 * 16), 0); + __lsx_vst(lasx_extracti128_lo(b->chunks[1]), + reinterpret_cast<__m128i *>(output + 2 * 16), 0); + + const __m128i lane3 = lasx_extracti128_hi(b->chunks[1]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane3, lane3, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 3 * 16), 0); + } break; + } + return 63; +} + // The caller of this function is responsible to ensure that there are 64 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = __lsx_vld(reinterpret_cast(src), 0); - b->chunks[1] = __lsx_vld(reinterpret_cast(src), 16); - b->chunks[2] = __lsx_vld(reinterpret_cast(src), 32); - b->chunks[3] = __lsx_vld(reinterpret_cast(src), 48); + b->chunks[0] = __lasx_xvld(reinterpret_cast(src), 0); + b->chunks[1] = __lasx_xvld(reinterpret_cast(src), 32); } // The caller of this function is responsible to ensure that there are 128 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char16_t *src) { - __m128i m1 = __lsx_vld(reinterpret_cast(src), 0); - __m128i m2 = __lsx_vld(reinterpret_cast(src), 16); - __m128i m3 = __lsx_vld(reinterpret_cast(src), 32); - __m128i m4 = __lsx_vld(reinterpret_cast(src), 48); - __m128i m5 = __lsx_vld(reinterpret_cast(src), 64); - __m128i m6 = __lsx_vld(reinterpret_cast(src), 80); - __m128i m7 = __lsx_vld(reinterpret_cast(src), 96); - __m128i m8 = __lsx_vld(reinterpret_cast(src), 112); - b->chunks[0] = __lsx_vssrlni_bu_h(m2, m1, 0); - b->chunks[1] = __lsx_vssrlni_bu_h(m4, m3, 0); - b->chunks[2] = __lsx_vssrlni_bu_h(m6, m5, 0); - b->chunks[3] = __lsx_vssrlni_bu_h(m8, m7, 0); + __m256i m1 = __lasx_xvld(reinterpret_cast(src), 0); + __m256i m2 = __lasx_xvld(reinterpret_cast(src), 32); + __m256i m3 = __lasx_xvld(reinterpret_cast(src), 64); + __m256i m4 = __lasx_xvld(reinterpret_cast(src), 96); + b->chunks[0] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m2, m1, 0), 0b11011000); + b->chunks[1] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m4, m3, 0), 0b11011000); } -static inline void base64_decode(char *out, __m128i str) { - __m128i t0 = __lsx_vor_v( - __lsx_vslli_w(str, 26), - __lsx_vslli_w(__lsx_vand_v(str, lsx_splat_u32(0x0000FF00)), 12)); - __m128i t1 = __lsx_vsrli_w(__lsx_vand_v(str, lsx_splat_u32(0x003F0000)), 2); - __m128i t2 = __lsx_vor_v(t0, t1); - __m128i t3 = __lsx_vor_v(t2, __lsx_vsrli_w(str, 16)); - const v16u8 pack_shuffle = {3, 2, 1, 7, 6, 5, 11, 10, - 9, 15, 14, 13, 0, 0, 0, 0}; - t3 = __lsx_vshuf_b(t3, t3, (__m128i)pack_shuffle); +static inline void base64_decode(char *out, __m256i str) { + __m256i t0 = __lasx_xvor_v( + __lasx_xvslli_w(str, 26), + __lasx_xvslli_w(__lasx_xvand_v(str, lasx_splat_u32(0x0000ff00)), 12)); + __m256i t1 = + __lasx_xvsrli_w(__lasx_xvand_v(str, lasx_splat_u32(0x003f0000)), 2); + __m256i t2 = __lasx_xvor_v(t0, t1); + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvsrli_w(str, 16)); + __m256i pack_shuffle = ____m256i( + (__m128i)v16u8{3, 2, 1, 7, 6, 5, 11, 10, 9, 15, 14, 13, 0, 0, 0, 0}); + t3 = __lasx_xvshuf_b(t3, t3, (__m256i)pack_shuffle); // Store the output: - // we only need 12. - __lsx_vstelm_d(t3, out, 0, 0); - __lsx_vstelm_w(t3, out + 8, 0, 2); + __lsx_vst(lasx_extracti128_lo(t3), out, 0); + __lsx_vst(lasx_extracti128_hi(t3), out, 12); } // decode 64 bytes and output 48 bytes static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, __lsx_vld(reinterpret_cast(src), 0)); - base64_decode(out + 12, - __lsx_vld(reinterpret_cast(src), 16)); + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); base64_decode(out + 24, - __lsx_vld(reinterpret_cast(src), 32)); - base64_decode(out + 36, - __lsx_vld(reinterpret_cast(src), 48)); + __lasx_xvld(reinterpret_cast(src), 32)); } + static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode_block(out, src); + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); + alignas(32) char buffer[32]; + base64_decode(buffer, + __lasx_xvld(reinterpret_cast(src), 32)); + std::memcpy(out + 24, buffer, 24); } + static inline void base64_decode_block(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - base64_decode(out + 36, b->chunks[3]); + base64_decode(out + 24, b->chunks[1]); } static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode_block(out, b); + base64_decode(out, b->chunks[0]); + alignas(32) char buffer[32]; + base64_decode(buffer, b->chunks[1]); + std::memcpy(out + 24, buffer, 24); } template + typename chartype> full_result -compress_decode_base64(char *dst, const char_type *src, size_t srclen, +compress_decode_base64(char *dst, const chartype *src, size_t srclen, base64_options options, last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = @@ -60020,15 +55513,19 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, } return {SUCCESS, full_input_length, 0}; } - const char_type *const srcinit = src; + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; + + const chartype *const srcinit = src; const char *const dstinit = dst; - const char_type *const srcend = src + srclen; + const chartype *const srcend = src + srclen; - constexpr size_t block_size = 10; + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); char buffer[block_size * 64]; char *bufferptr = buffer; if (srclen >= 64) { - const char_type *const srcend64 = src + srclen - 64; + const chartype *const srcend64 = src + srclen - 64; while (src <= srcend64) { block64 b; load_block(&b, src); @@ -60036,48 +55533,56 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, bool error = false; uint64_t badcharmask = to_base64_mask(&b, &error); - if (badcharmask) { - if (error && !ignore_garbage) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - if (src < srcend) { - // should never happen - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; + if (error && !ignore_garbage) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else { - // optimization opportunity: if bufferptr == buffer and mask == 0, we - // can avoid the call to compress_block and decode directly. + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); + } else { + bufferptr += compress_block(&b, badcharmask, bufferptr); + } + } else if (bufferptr != buffer) { copy_block(&b, bufferptr); bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, &b); + } else { + base64_decode_block(dst, &b); + } + dst += 48; } if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { + for (size_t i = 0; i < (block_size - 2); i++) { base64_decode_block(dst, buffer + i * 64); dst += 48; } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; std::memcpy(buffer, buffer + (block_size - 1) * 64, 64); // 64 might be too much bufferptr -= (block_size - 1) * 64; } } } + char *buffer_start = buffer; // Optimization note: if this is almost full, then it is worth our // time, otherwise, we should just decode directly. int last_block = (int)((bufferptr - buffer_start) % 64); if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); @@ -60092,7 +55597,11 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, } for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } dst += 48; } if ((bufferptr - buffer_start) % 64 != 0) { @@ -60102,7 +55611,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - // lsx is little-endian + // lasx is little-endian triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 4); @@ -60115,7 +55624,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - // lsx is little-endian + // lasx is little-endian triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 3); @@ -60175,22 +55684,24 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, } return {SUCCESS, srclen, size_t(dst - dstinit)}; } -/* end file src/lsx/lsx_base64.cpp */ -/* begin file src/lsx/lsx_find.cpp */ +/* end file src/lasx/lasx_base64.cpp */ +/* begin file src/lasx/lasx_find.cpp */ simdutf_really_inline const char *util_find(const char *start, const char *end, char character) noexcept { if (start >= end) return end; - const int step = 16; - __m128i char_vec = __lsx_vreplgr2vr_b(static_cast(character)); + const int step = 32; + __m256i char_vec = __lasx_xvreplgr2vr_b(static_cast(character)); while (end - start >= step) { - __m128i data = __lsx_vld(reinterpret_cast(start), 0); - __m128i cmp = __lsx_vseq_b(data, char_vec); - if (__lsx_bnz_v(cmp)) { - uint16_t mask = - static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); + __m256i data = __lasx_xvld(reinterpret_cast(start), 0); + __m256i cmp = __lasx_xvseq_b(data, char_vec); + if (__lasx_xbnz_v(cmp)) { + __m256i res = __lasx_xvmsknz_b(cmp); + uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); + uint32_t mask = (mask0 | (mask1 << 16)); return start + trailing_zeroes(mask); } @@ -60213,15 +55724,17 @@ simdutf_really_inline const char16_t *util_find(const char16_t *start, if (start >= end) return end; - const int step = 8; - __m128i char_vec = __lsx_vreplgr2vr_h(static_cast(character)); + const int step = 16; + __m256i char_vec = __lasx_xvreplgr2vr_h(static_cast(character)); while (end - start >= step) { - __m128i data = __lsx_vld(reinterpret_cast(start), 0); - __m128i cmp = __lsx_vseq_h(data, char_vec); - if (__lsx_bnz_v(cmp)) { - uint16_t mask = - static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); + __m256i data = __lasx_xvld(reinterpret_cast(start), 0); + __m256i cmp = __lasx_xvseq_h(data, char_vec); + if (__lasx_xbnz_v(cmp)) { + __m256i res = __lasx_xvmsknz_b(cmp); + uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); + uint32_t mask = (mask0 | (mask1 << 16)); return start + trailing_zeroes(mask) / 2; } @@ -60237,16 +55750,16 @@ simdutf_really_inline const char16_t *util_find(const char16_t *start, return end; } -/* end file src/lsx/lsx_find.cpp */ +/* end file src/lasx/lasx_find.cpp */ #endif // SIMDUTF_FEATURE_BASE64 } // namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* begin file src/generic/buf_block_reader.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { // Walks through a buffer in block-sized increments, loading the last part with @@ -60352,13 +55865,13 @@ simdutf_really_inline void buf_block_reader::advance() { } } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/buf_block_reader.h */ #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_validation { @@ -60578,12 +56091,12 @@ struct utf8_checker { using utf8_validation::utf8_checker; } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ /* begin file src/generic/utf8_validation/utf8_validator.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_validation { @@ -60664,14 +56177,14 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { } // namespace utf8_validation } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_validation/utf8_validator.h */ #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_ASCII /* begin file src/generic/ascii_validation.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace ascii_validation { @@ -60718,7 +56231,7 @@ bool generic_validate_ascii(const char *input, size_t length) { } // namespace ascii_validation } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/ascii_validation.h */ #endif // SIMDUTF_FEATURE_ASCII @@ -60727,7 +56240,7 @@ bool generic_validate_ascii(const char *input, size_t length) { // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -61040,12 +56553,12 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_latin1 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -61119,17 +56632,16 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace utf8_to_latin1 } // namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 // transcoding from UTF-8 to UTF-16 /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_utf16 { @@ -61200,12 +56712,12 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_utf16 { using namespace simd; @@ -61534,12 +57046,12 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ /* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8 { @@ -61589,16 +57101,15 @@ simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, } // namespace utf8 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 // transcoding from UTF-8 to UTF-32 /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_utf32 { @@ -61637,12 +57148,12 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8_to_utf32 { using namespace simd; @@ -61957,7 +57468,7 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -61965,7 +57476,7 @@ struct validating_transcoder { #if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/utf8.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf8 { @@ -62054,7 +57565,7 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace utf8 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8.h */ #endif // SIMDUTF_FEATURE_UTF8 @@ -62062,7 +57573,7 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, #if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16/count_code_points_bytemask.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { @@ -62077,11 +57588,11 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, size_t pos = 0; size_t count = 0; - constexpr size_t max_itertions = 65535; + constexpr size_t max_iterations = 65535; const auto one = vector_u16::splat(1); const auto zero = vector_u16::zero(); - size_t itertion = 0; + size_t iteration = 0; auto counters = zero; for (; pos < size / N * N; pos += N) { @@ -62099,15 +57610,15 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, counters += t2; - itertion += 1; - if (itertion == max_itertions) { + iteration += 1; + if (iteration == max_iterations) { count += counters.sum(); counters = zero; - itertion = 0; + iteration = 0; } } - if (itertion > 0) { + if (iteration > 0) { count += counters.sum(); } @@ -62117,12 +57628,12 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf16/count_code_points_bytemask.h */ /* begin file src/generic/utf16/change_endianness.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { @@ -62143,12 +57654,12 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf16/change_endianness.h */ /* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { @@ -62344,12 +57855,12 @@ utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ /* begin file src/generic/utf16/utf32_length_from_utf16.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { @@ -62361,12 +57872,12 @@ simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf16/utf32_length_from_utf16.h */ /* begin file src/generic/utf16/to_well_formed.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { @@ -62456,7 +57967,7 @@ void to_well_formed(const char16_t *in, size_t n, char16_t *out) { } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf16/to_well_formed.h */ #endif // SIMDUTF_FEATURE_UTF16 @@ -62464,7 +57975,7 @@ void to_well_formed(const char16_t *in, size_t n, char16_t *out) { #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/validate_utf16.h */ namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf16 { /* @@ -62625,7 +58136,7 @@ const result validate_utf16_as_ascii_with_errors(const char16_t *input, } // namespace utf16 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/validate_utf16.h */ #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING @@ -62635,7 +58146,7 @@ const result validate_utf16_as_ascii_with_errors(const char16_t *input, #include namespace simdutf { -namespace lsx { +namespace lasx { namespace { namespace utf32 { @@ -62766,7 +58277,7 @@ simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, } // namespace utf32 } // unnamed namespace -} // namespace lsx +} // namespace lasx } // namespace simdutf /* end file src/generic/utf32.h */ #endif // SIMDUTF_FEATURE_UTF32 @@ -62775,7 +58286,7 @@ simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, // Implementation-specific overrides // namespace simdutf { -namespace lsx { +namespace lasx { #if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int @@ -62809,33 +58320,33 @@ implementation::detect_encodings(const char *input, #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_utf8(buf, len); + return lasx::utf8_validation::generic_validate_utf8(buf, len); } #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); + return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } #endif // SIMDUTF_FEATURE_UTF8 #if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lsx::ascii_validation::generic_validate_ascii(buf, len); + return lasx::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return lsx::ascii_validation::generic_validate_ascii_with_errors(buf, len); + return lasx::ascii_validation::generic_validate_ascii_with_errors(buf, len); } #endif // SIMDUTF_FEATURE_ASCII #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept { - return lsx::utf16::validate_utf16_as_ascii_with_errors( + return lasx::utf16::validate_utf16_as_ascii_with_errors( buf, len) .error == SUCCESS; } @@ -62843,8 +58354,8 @@ implementation::validate_utf16le_as_ascii(const char16_t *buf, simdutf_warn_unused bool implementation::validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept { - return lsx::utf16::validate_utf16_as_ascii_with_errors(buf, - len) + return lasx::utf16::validate_utf16_as_ascii_with_errors(buf, + len) .error == SUCCESS; } #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII @@ -62857,8 +58368,7 @@ implementation::validate_utf16le(const char16_t *buf, return true; } const auto res = - lsx::utf16::validate_utf16_with_errors(buf, len); - + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.is_err()) { return false; } @@ -62880,9 +58390,9 @@ implementation::validate_utf16be(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const auto res = - lsx::utf16::validate_utf16_with_errors(buf, len); + const auto res = + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.is_err()) { return false; } @@ -62901,7 +58411,7 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( return result(error_code::SUCCESS, 0); } const result res = - lsx::utf16::validate_utf16_with_errors(buf, len); + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { const result scalar_res = scalar::utf16::validate_with_errors( @@ -62918,7 +58428,7 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( return result(error_code::SUCCESS, 0); } const result res = - lsx::utf16::validate_utf16_with_errors(buf, len); + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { const result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, @@ -62931,12 +58441,12 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept { - utf16::to_well_formed(input, len, output); + return utf16::to_well_formed(input, len, output); } void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept { - utf16::to_well_formed(input, len, output); + return utf16::to_well_formed(input, len, output); } #endif // SIMDUTF_FEATURE_UTF16 @@ -62947,7 +58457,7 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { // empty input is valid. protected the implementation from nullptr. return true; } - const char32_t *tail = lsx_validate_utf32le(buf, len); + const char32_t *tail = lasx_validate_utf32le(buf, len); if (tail) { return scalar::utf32::validate(tail, len - (tail - buf)); } else { @@ -62962,7 +58472,7 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lsx_validate_utf32le_with_errors(buf, len); + result res = lasx_validate_utf32le_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count); @@ -62977,7 +58487,7 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lsx_convert_latin1_to_utf8(buf, len, utf8_output); + lasx_convert_latin1_to_utf8(buf, len, utf8_output); size_t converted_chars = ret.second - utf8_output; if (ret.first != buf + len) { @@ -62993,7 +58503,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lsx_convert_latin1_to_utf16le(buf, len, utf16_output); + lasx_convert_latin1_to_utf16le(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = @@ -63007,7 +58517,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lsx_convert_latin1_to_utf16be(buf, len, utf16_output); + lasx_convert_latin1_to_utf16be(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = @@ -63023,7 +58533,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lsx_convert_latin1_to_utf32(buf, len, utf32_output); + lasx_convert_latin1_to_utf32(buf, len, utf32_output); size_t converted_chars = ret.second - utf32_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( @@ -63037,19 +58547,117 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + return 0; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0xFF < code_point) { + return 0; + } + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); + size_t convert_result = + converter.convert(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; } simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if ((buf[pos] & 0b11100000) == 0b11000000) { + if (pos + 1 >= len) + return result(error_code::TOO_SHORT, pos); + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return result(error_code::TOO_SHORT, pos); + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80) + return result(error_code::OVERLONG, pos); + if (0xFF < code_point) + return result(error_code::TOO_LARGE, pos); + *latin1_output++ = char(code_point); + pos += 2; + } else if ((buf[pos] & 0b11110000) == 0b11100000) { + return result(error_code::TOO_LARGE, pos); + } else if ((buf[pos] & 0b11111000) == 0b11110000) { + return result(error_code::TOO_LARGE, pos); + } else { + if ((buf[pos] & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + return result(error_code::HEADER_BITS, pos); + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return result(error_code::SUCCESS, convert_size); + utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); + result res = + converter.convert_with_errors(buf + pos, len - pos, latin1_output); + return res.error ? result(res.error, res.count + pos) + : result(res.error, res.count + convert_size); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { - return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + break; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; + + size_t convert_result = + lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; } #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -63115,7 +58723,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_latin1(buf, len, latin1_output); + lasx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -63136,7 +58744,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_latin1(buf, len, latin1_output); + lasx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -63158,7 +58766,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_latin1_with_errors( + lasx_convert_utf16_to_latin1_with_errors( buf, len, latin1_output); if (ret.first.error) { return ret.first; @@ -63185,8 +58793,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); + lasx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63225,7 +58833,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_utf8(buf, len, utf8_output); + lasx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -63245,7 +58853,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_utf8(buf, len, utf8_output); + lasx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -63267,8 +58875,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63295,8 +58903,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63336,7 +58944,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( return 0; } std::pair ret = - lsx_convert_utf32_to_utf8(buf, len, utf8_output); + lasx_convert_utf32_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -63360,7 +58968,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + lasx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf8::convert_with_errors( buf + ret.first.count, len - ret.first.count, ret.second); @@ -63382,7 +58990,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_utf32(buf, len, utf32_output); + lasx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -63402,7 +59010,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lsx_convert_utf16_to_utf32(buf, len, utf32_output); + lasx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -63424,8 +59032,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63452,8 +59060,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63480,7 +59088,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf32_to_latin1(buf, len, latin1_output); + lasx_convert_utf32_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -63500,7 +59108,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + lasx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -63524,7 +59132,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lsx_convert_utf32_to_latin1(buf, len, latin1_output); + lasx_convert_utf32_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -63551,7 +59159,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lsx_convert_utf32_to_utf16(buf, len, utf16_output); + lasx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -63572,7 +59180,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lsx_convert_utf32_to_utf16(buf, len, utf16_output); + lasx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -63594,8 +59202,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -63618,8 +59226,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lsx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -63679,7 +59287,23 @@ simdutf_warn_unused size_t implementation::count_utf16be( #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); + size_t pos = 0; + size_t count = 0; + // Performance degradation when memory address is not 32-byte aligned + while ((((uint64_t)input + pos) & 0x1F && pos < length)) { + if (input[pos++] > -65) { + count++; + } + } + __m256i v_bf = __lasx_xvldi(0xBF); // 0b10111111 + for (; pos + 32 <= length; pos += 32) { + __m256i in = __lasx_xvld(reinterpret_cast(input + pos), 0); + __m256i utf8_count = + __lasx_xvpcnt_h(__lasx_xvmskltz_b(__lasx_xvslt_b(v_bf, in))); + count = count + __lasx_xvpickve2gr_wu(utf8_count, 0) + + __lasx_xvpickve2gr_wu(utf8_count, 4); + } + return count + scalar::utf8::count_code_points(input + pos, length - pos); } #endif // SIMDUTF_FEATURE_UTF8 @@ -63765,12 +59389,12 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m128i v_ffff = lsx_splat_u32(0x0000ffff); + __m128i v_ffff = lsx_splat_u32(0x0000ffff); size_t pos = 0; size_t count = 0; for (; pos + 4 <= length; pos += 4) { __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); - const __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); size_t surrogate_count = __lsx_vpickve2gr_bu( __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); count += 4 + surrogate_count; @@ -63936,33 +59560,39 @@ const char16_t *implementation::find(const char16_t *start, const char16_t *end, } #endif // SIMDUTF_FEATURE_BASE64 -} // namespace lsx +} // namespace lasx } // namespace simdutf -/* begin file src/simdutf/lsx/end.h */ +/* begin file src/simdutf/lasx/end.h */ #undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lsx/end.h */ -/* end file src/lsx/implementation.cpp */ + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION #endif -#if SIMDUTF_IMPLEMENTATION_LASX -/* begin file src/lasx/implementation.cpp */ -/* begin file src/simdutf/lasx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lasx" -// #define SIMDUTF_IMPLEMENTATION lasx +/* end file src/simdutf/lasx/end.h */ +/* end file src/lasx/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +/* begin file src/lsx/implementation.cpp */ +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx #define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lasx/begin.h */ +/* end file src/simdutf/lsx/begin.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { -#ifndef SIMDUTF_LASX_H - #error "lasx.h must be included" +#ifndef SIMDUTF_LSX_H + #error "lsx.h must be included" #endif using namespace simd; #if SIMDUTF_FEATURE_UTF8 // convert vmskltz/vmskgez/vmsknz to // simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index -const uint8_t lasx_1_2_utf8_bytes_mask[] = { +const uint8_t lsx_1_2_utf8_bytes_mask[] = { 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, @@ -63987,9 +59617,6 @@ const uint8_t lasx_1_2_utf8_bytes_mask[] = { simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { return __lsx_vshuf4i_b(vec, 0b10110001); } -simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { - return __lasx_xvshuf4i_b(vec, 0b10110001); -} #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ @@ -64060,7 +59687,7 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits // 1 byte: 00000000 00000000 // 2 byte: 00000aaa aa000000 - __m128i v1f00 = lsx_splat_u16(0x1f00); + const __m128i v1f00 = lsx_splat_u16(0x1f00); __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits // Combine with a shift right accumulate // 1 byte: 00000000 0bbbbbbb @@ -64072,7 +59699,7 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { // SIMDUTF_FEATURE_UTF32) #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lasx/lasx_validate_utf16.cpp */ +/* begin file src/lsx/lsx_validate_utf16.cpp */ template simd8 utf16_gather_high_bytes(const simd16 in0, const simd16 in1) { @@ -64086,117 +59713,101 @@ simd8 utf16_gather_high_bytes(const simd16 in0, return simd16::pack_shifted_right<8>(in0, in1); } } -/* end file src/lasx/lasx_validate_utf16.cpp */ +/* end file src/lsx/lsx_validate_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lasx/lasx_validate_utf32le.cpp */ -const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { +/* begin file src/lsx/lsx_validate_utf32le.cpp */ +const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { const char32_t *end = input + size; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)input & 0x1F) && input < end) { - uint32_t word = *input++; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return nullptr; - } - } - - __m256i offset = lasx_splat_u32(0xffff2000); - __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); - __m256i standardmax = lasx_splat_u32(0x10ffff); - __m256i currentmax = __lasx_xvldi(0x0); - __m256i currentoffsetmax = __lasx_xvldi(0x0); + __m128i offset = lsx_splat_u32(0xffff2000); + __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); + __m128i standardmax = lsx_splat_u32(0x10ffff); + __m128i currentmax = lsx_splat_u32(0); + __m128i currentoffsetmax = lsx_splat_u32(0); - while (input + 8 < end) { - __m256i in = __lasx_xvld(reinterpret_cast(input), 0); - currentmax = __lasx_xvmax_wu(in, currentmax); + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF currentoffsetmax = - __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); - input += 8; + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + + input += 4; } - __m256i is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); - if (__lasx_xbnz_v(is_zero)) { + + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { return nullptr; } - is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lasx_xbnz_v(is_zero)) { + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { return nullptr; } + return input; } -const result lasx_validate_utf32le_with_errors(const char32_t *input, - size_t size) { +const result lsx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { const char32_t *start = input; const char32_t *end = input + size; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)input & 0x1F) && input < end) { - uint32_t word = *input; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, input - start); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, input - start); - } - input++; - } - - __m256i offset = lasx_splat_u32(0xffff2000); - __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); - __m256i standardmax = lasx_splat_u32(0x10ffff); - __m256i currentmax = __lasx_xvldi(0x0); - __m256i currentoffsetmax = __lasx_xvldi(0x0); + __m128i offset = lsx_splat_u32(0xffff2000); + __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); + __m128i standardmax = lsx_splat_u32(0x10ffff); + __m128i currentmax = lsx_splat_u32(0); + __m128i currentoffsetmax = lsx_splat_u32(0); - while (input + 8 < end) { - __m256i in = __lasx_xvld(reinterpret_cast(input), 0); - currentmax = __lasx_xvmax_wu(in, currentmax); + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); currentoffsetmax = - __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); - __m256i is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); - if (__lasx_xbnz_v(is_zero)) { + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { return result(error_code::TOO_LARGE, input - start); } - is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lasx_xbnz_v(is_zero)) { + + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { return result(error_code::SURROGATE, input - start); } - input += 8; + + input += 4; } return result(error_code::SUCCESS, input - start); } -/* end file src/lasx/lasx_validate_utf32le.cpp */ +/* end file src/lsx/lsx_validate_utf32le.cpp */ #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +/* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ /* Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ std::pair -lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { +lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); - const size_t safety_margin = 12; const char *end = latin1_input + len; + __m128i zero = __lsx_vldi(0); // We always write 16 bytes, of which more than the first 8 bytes // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { + while (end - latin1_input >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); - uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); - if (ascii_mask == 0xFFFF) { + uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); + if (ascii == 0xffff) { // ASCII fast path!!!! __lsx_vst(in8, utf8_output, 0); utf8_output += 16; latin1_input += 16; @@ -64204,76 +59815,48 @@ lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, } // We just fallback on UTF-16 code. This could be optimized/simplified // further. - __m256i in16 = __lasx_vext2xv_hu_bu(____m256i(in8)); + __m128i in16 = __lsx_vilvl_b(zero, in8); // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 16 - // expected output : [1100|00aa|10bb|bbbb] x 16 + // input 8-bit word : [aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 // t0 = [0000|00aa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in16, 2); + __m128i t0 = __lsx_vslli_h(in16, 2); // t1 = [0000|00aa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x300)); + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x300)); // t3 = [0000|00aa|00bb|bbbb] - __m256i t2 = __lasx_xvbitsel_v(t1, in16, __lasx_xvrepli_h(0x3f)); + __m128i t2 = __lsx_vbitsel_v(t1, in16, __lsx_vrepli_h(0x3f)); // t4 = [1100|00aa|10bb|bbbb] - __m256i t3 = __lasx_xvor_v(t2, __lasx_xvreplgr2vr_h(uint16_t(0xc080))); + __m128i t3 = __lsx_vor_v(t2, __lsx_vreplgr2vr_h(uint16_t(0xc080))); // merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = __lasx_xvsle_hu(in16, __lasx_xvrepli_h(0x7F)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t3, in16, one_byte_bytemask); + __m128i one_byte_bytemask = __lsx_vsle_hu(in16, __lsx_vrepli_h(0x7F)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t3, in16, one_byte_bytemask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[(ascii_mask & 0xFF)]][0]; - __m128i shuffle0 = __lsx_vld(row0 + 1, 0); - __m128i utf8_unpacked_lo = lasx_extracti128_lo(utf8_unpacked); - __m128i utf8_packed0 = - __lsx_vshuf_b(utf8_unpacked_lo, utf8_unpacked_lo, shuffle0); - __lsx_vst(utf8_packed0, utf8_output, 0); - utf8_output += row0[0]; + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[(ascii & 0xff)]][0]; + __m128i shuffle = __lsx_vld(row + 1, 0); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[(ascii_mask >> 8)]][0]; - __m128i shuffle1 = __lsx_vld(row1 + 1, 0); - __m128i utf8_unpacked_hi = lasx_extracti128_hi(utf8_unpacked); - __m128i utf8_packed1 = - __lsx_vshuf_b(utf8_unpacked_hi, utf8_unpacked_hi, shuffle1); - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; + // store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // adjust pointers + latin1_input += 8; + utf8_output += row[0]; - latin1_input += 16; } // while return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); } -/* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +/* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +/* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ std::pair -lasx_convert_latin1_to_utf16le(const char *buf, size_t len, - char16_t *utf16_output) { +lsx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { const char *end = buf + len; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - *utf16_output++ = uint8_t(*buf) & 0xFF; - buf++; - } - - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i inlow = __lasx_vext2xv_hu_bu(in8); - __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001); - __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high); - __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); - __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); - - utf16_output += 32; - buf += 32; - } - - if (end - buf >= 16) { - __m128i zero = __lsx_vldi(0); + __m128i zero = __lsx_vldi(0); + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i inlow = __lsx_vilvl_b(zero, in8); @@ -64284,38 +59867,20 @@ lasx_convert_latin1_to_utf16le(const char *buf, size_t len, utf16_output += 16; buf += 16; } + return std::make_pair(buf, utf16_output); } std::pair -lasx_convert_latin1_to_utf16be(const char *buf, size_t len, - char16_t *utf16_output) { +lsx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { const char *end = buf + len; - - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - *utf16_output++ = (uint16_t(*buf++) << 8); - } - - __m256i zero = __lasx_xvldi(0); - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000); - - __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero); - __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero); - __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); - __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); - utf16_output += 32; - buf += 32; - } - - if (end - buf >= 16) { - __m128i zero_128 = __lsx_vldi(0); + __m128i zero = __lsx_vldi(0); + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i inlow = __lsx_vilvl_b(in8, zero_128); - __m128i inhigh = __lsx_vilvh_b(in8, zero_128); + __m128i inlow = __lsx_vilvl_b(in8, zero); + __m128i inhigh = __lsx_vilvh_b(in8, zero); __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); utf16_output += 16; @@ -64324,44 +59889,16 @@ lasx_convert_latin1_to_utf16be(const char *buf, size_t len, return std::make_pair(buf, utf16_output); } -/* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +/* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ +/* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ std::pair -lasx_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { +lsx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { const char *end = buf + len; - // LASX requires 32-byte alignment, otherwise performance will be degraded - while (((uint64_t)utf32_output & 0x1F) && buf < end) { - *utf32_output++ = ((uint32_t)*buf) & 0xFF; - buf++; - } - - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); - __lasx_xvst(in32_0, reinterpret_cast(utf32_output), 0); - - __m256i in8_1 = __lasx_xvpermi_d(in8, 0b00000001); - __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); - __lasx_xvst(in32_1, reinterpret_cast(utf32_output), 32); - - __m256i in8_2 = __lasx_xvpermi_d(in8, 0b00000010); - __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); - __lasx_xvst(in32_2, reinterpret_cast(utf32_output), 64); - - __m256i in8_3 = __lasx_xvpermi_d(in8, 0b00000011); - __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); - __lasx_xvst(in32_3, reinterpret_cast(utf32_output), 96); - - utf32_output += 32; - buf += 32; - } - - if (end - buf >= 16) { + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i zero = __lsx_vldi(0); @@ -64373,9 +59910,9 @@ lasx_convert_latin1_to_utf32(const char *buf, size_t len, __m128i in32_3 = __lsx_vilvh_h(zero, in16high); __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); - __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); - __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); + __lsx_vst(in32_1, reinterpret_cast(utf32_output + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(utf32_output + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(utf32_output + 12), 0); utf32_output += 16; buf += 16; @@ -64383,11 +59920,11 @@ lasx_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } -/* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ +/* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. @@ -64412,22 +59949,15 @@ size_t convert_masked_utf8_to_utf16(const char *input, // We first try a few fast paths. // The obvious first test is ASCII, which actually consumes the full 16. if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - __m128i zero = __lsx_vldi(0); - if simdutf_constexpr (match_system(big_endian)) { - __lsx_vst(__lsx_vilvl_b(zero, in), - reinterpret_cast(utf16_output), 0); - __lsx_vst(__lsx_vilvh_b(zero, in), - reinterpret_cast(utf16_output), 16); - } else { - __lsx_vst(__lsx_vilvl_b(in, zero), - reinterpret_cast(utf16_output), 0); - __lsx_vst(__lsx_vilvh_b(in, zero), - reinterpret_cast(utf16_output), 16); - } + // We process in chunks of 16 bytes + // The routine in simd.h is reused. + simd8 temp{in}; + temp.store_ascii_as_utf16(utf16_output); utf16_output += 16; // We wrote 16 16-bit characters. return 16; // We consumed 16 bytes. } + uint64_t buffer[2]; // 3 byte sequences are the next most common, as seen in CJK, which has long // sequences of these. if (input_utf8_end_of_code_point_mask == 0x924) { @@ -64455,8 +59985,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 8; // We wrote 6 16-bit characters. - return 16; // We consumed 12 bytes. + utf16_output += 6; // We wrote 6 16-bit characters. + return 12; // We consumed 12 bytes. } /// We do not have a fast path available, or the fast path is unimportant, so @@ -64528,6 +60058,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, } else if (idx < 209) { // THREE (3) input code-code units if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + __m128i expected_mask = (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; @@ -64537,13 +60074,6 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); if (__lsx_bz_b(check)) return 12; - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. - // Swap byte pairs // 10dddddd 10cccccc|10bbbbbb 11110aaa // 10cccccc 10dddddd|11110aaa 10bbbbbb @@ -64561,7 +60091,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); // Generate unadjusted trail surrogate minus lowest 2 bits // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000FF00)); + __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000ff00)); // Insert low 2 bits of trail surrogate to magic number for later // 11011100 00000000 11100111 110000cc __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); @@ -64584,7 +60114,9 @@ size_t convert_masked_utf8_to_utf16(const char *input, if simdutf_constexpr (!match_system(big_endian)) { composed = lsx_swap_bytes(composed); } - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + // __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + __lsx_vst(composed, reinterpret_cast(buffer), 0); + std::memcpy(utf16_output, buffer, 12); utf16_output += 6; // We 3 32-bit surrogate pairs. return 12; // We consumed 12 bytes. } @@ -64684,10 +60216,10 @@ size_t convert_masked_utf8_to_utf16(const char *input, return 12; } } -/* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +/* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. @@ -64714,19 +60246,8 @@ size_t convert_masked_utf8_to_utf32(const char *input, // We process in chunks of 16 bytes. // use fast implementation in src/simdutf/arm64/simd.h // Ideally the compiler can keep the tables in registers. - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, in); - __m128i in16high = __lsx_vilvh_b(zero, in); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - - __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); - __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); - __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); - + simd8 temp{in}; + temp.store_ascii_as_utf32_tbl(utf32_out); utf32_output += 16; // We wrote 16 32-bit characters. return 16; // We consumed 16 bytes. } @@ -64755,7 +60276,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, utf32_output += 6; return 12; // We consumed 12 bytes. } - // Either no fast path or an unimportant fast path. + /// Either no fast path or an unimportant fast path. const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex [input_utf8_end_of_code_point_mask][0]; @@ -64878,10 +60399,10 @@ size_t convert_masked_utf8_to_utf32(const char *input, return 12; } } -/* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +/* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ size_t convert_masked_utf8_to_latin1(const char *input, uint64_t utf8_end_of_code_point_mask, char *&latin1_output) { @@ -64950,19 +60471,22 @@ size_t convert_masked_utf8_to_latin1(const char *input, // writing 8 bytes even though we only care about the first 6 bytes. __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + uint64_t buffer[2]; + // __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + __lsx_vst(latin1_packed, reinterpret_cast(buffer), 0); + std::memcpy(latin1_output, buffer, 6); latin1_output += 6; // We wrote 6 bytes. return consumed; } -/* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ +/* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +/* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ template std::pair -lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { +lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *end = buf + len; while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); @@ -64988,8 +60512,8 @@ lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, template std::pair -lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { +lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; while (end - buf >= 16) { @@ -65023,12 +60547,12 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +/* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ /* - The vectorized algorithm works on single LASX register i.e., it + The vectorized algorithm works on single SSE register i.e., it loads eight 16-bit code units. We consider three cases: @@ -65046,7 +60570,7 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole LASX register with a single + codes and finally compress the whole SSE register with a single shuffle. We need 256-entry lookup table to get a compression pattern @@ -65064,7 +60588,7 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, the three-UTF8-bytes case. Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two LASX registers. + array of 32-bit values. The array spans two SSE registers. The bytes from the registers are compressed using two shuffles. We need 256-entry lookup table to get a compression pattern @@ -65079,10 +60603,9 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ - template std::pair -lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { +lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *end = buf + len; @@ -65090,77 +60613,82 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lasx_swap_bytes(in); + in = lsx_swap_bytes(in); } - if (__lasx_xbnz_h(__lasx_xvslt_hu( - in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! - // 1. pack the bytes - __m256i utf8_packed = - __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); - // 2. store (16 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } } - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 - // expected output : [110a|aaaa|10bb|bbbb] x 16 + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in, 2); + __m128i t0 = __lsx_vslli_h(in, 2); // t1 = [000a|aaaa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - __m256i t3 = __lasx_xvor_v(t1, t2); + __m128i t3 = __lsx_vor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); - __m256i t4 = __lasx_xvor_v(t3, v_c080); + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); // 4. pack the bytes - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; continue; } - __m256i surrogates_bytemask = __lasx_xvseq_h( - __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); + __m128i surrogates_bytemask = __lsx_vseq_h( + __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { + if (__lsx_bz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -65190,94 +60718,74 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(in, in); - t0 = __lasx_xvilvl_b(t0, t0); + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(in, 12); + __m128i s0 = __lsx_vsrli_h(in, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(in, 2); + __m128i s1 = __lsx_vslli_h(in, 2); // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); + __m128i s2 = __lsx_vor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); - __m256i one_byte_bytemask_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - __m256i one_or_two_bytes_bytemask_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); - __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( - one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -65332,8 +60840,8 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { */ template std::pair -lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { +lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *start = buf; const char16_t *end = buf + len; @@ -65341,78 +60849,82 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - - __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lasx_swap_bytes(in); + in = lsx_swap_bytes(in); } - if (__lasx_xbnz_h(__lasx_xvslt_hu( - in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! - // 1. pack the bytes - __m256i utf8_packed = - __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); - // 2. store (16 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } } - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 - // expected output : [110a|aaaa|10bb|bbbb] x 16 + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in, 2); + __m128i t0 = __lsx_vslli_h(in, 2); // t1 = [000a|aaaa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - __m256i t3 = __lasx_xvor_v(t1, t2); + __m128i t3 = __lsx_vor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); - __m256i t4 = __lasx_xvor_v(t3, v_c080); + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); // 4. pack the bytes - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; continue; } - __m256i surrogates_bytemask = __lasx_xvseq_h( - __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); + __m128i surrogates_bytemask = __lsx_vseq_h( + __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { + if (__lsx_bz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -65442,94 +60954,74 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(in, in); - t0 = __lasx_xvilvl_b(t0, t0); + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(in, 12); + __m128i s0 = __lsx_vsrli_h(in, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(in, 2); + __m128i s1 = __lsx_vslli_h(in, 2); // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); + __m128i s2 = __lsx_vor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); - __m256i one_byte_bytemask_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - __m256i one_or_two_bytes_bytemask_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); - __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( - one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -65577,63 +61069,38 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output)); } -/* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ +/* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ template std::pair -lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { +lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *end = buf + len; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf32_output & 0x1f) && buf < end) { - uint16_t word = scalar::utf16::swap_if_needed(buf[0]); - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - buf++; - } else { - if (buf + 1 >= end) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - buf += 2; - } - } - - __m256i v_f800 = lasx_splat_u16(0xf800); - __m256i v_d800 = lasx_splat_u16(0xd800); + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = lsx_splat_u16(0xf800); + __m128i v_d800 = lsx_splat_u16(0xd800); - while (end - buf >= 16) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + while (end - buf >= 8) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lasx_swap_bytes(in); + in = lsx_swap_bytes(in); } - __m256i surrogates_bytemask = - __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { + if (__lsx_bz_v(surrogates_bytemask)) { // case: no surrogate pairs, extend all 16-bit code units to 32-bit code // units - __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); - __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); - __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); - utf32_output += 16; - buf += 16; + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -65678,56 +61145,31 @@ lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, */ template std::pair -lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { +lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *start = buf; const char16_t *end = buf + len; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf32_output & 0x1f) && buf < end) { - uint16_t word = scalar::utf16::swap_if_needed(buf[0]); - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - buf++; - } else if (buf + 1 < end) { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - buf += 2; - } else { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf32_output)); - } - } + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = lsx_splat_u16(0xf800); + __m128i v_d800 = lsx_splat_u16(0xd800); - __m256i v_f800 = lasx_splat_u16(0xf800); - __m256i v_d800 = lasx_splat_u16(0xd800); - while (end - buf >= 16) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + while (end - buf >= 8) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if simdutf_constexpr (!match_system(big_endian)) { - in = lasx_swap_bytes(in); + in = lsx_swap_bytes(in); } - __m256i surrogates_bytemask = - __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + if (__lsx_bz_v(surrogates_bytemask)) { // case: no surrogate pairs, extend all 16-bit code units to 32-bit code // units - __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); - __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); - __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); - utf32_output += 16; - buf += 16; + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -65764,35 +61206,31 @@ lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf32_output)); } -/* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ +/* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +/* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ std::pair -lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { +lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *end = buf + len; - const __m256i shuf_mask = ____m256i( - (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); - __m256i v_ff = __lasx_xvrepli_w(0xFF); + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); while (end - buf >= 16) { - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); - __m256i in12 = __lasx_xvor_v(in1, in2); - if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + __m128i in12 = __lsx_vor_v(in1, in2); + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { // 1. pack the bytes - __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); - latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); - __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); - latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); // 2. store (8 bytes) __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); // 3. adjust pointers - buf += 16; - latin1_output += 16; + buf += 8; + latin1_output += 8; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -65801,34 +61239,31 @@ lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, } std::pair -lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { +lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *start = buf; const char32_t *end = buf + len; - const __m256i shuf_mask = ____m256i( - (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); - __m256i v_ff = __lasx_xvrepli_w(0xFF); + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); while (end - buf >= 16) { - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); - __m256i in12 = __lasx_xvor_v(in1, in2); - if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + __m128i in12 = __lsx_vor_v(in1, in2); + + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { // 1. pack the bytes - __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); - latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); - __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); - latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); // 2. store (8 bytes) __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); // 3. adjust pointers - buf += 16; - latin1_output += 16; + buf += 8; + latin1_output += 8; } else { // Let us do a scalar fallback. - for (int k = 0; k < 16; k++) { + for (int k = 0; k < 8; k++) { uint32_t word = buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); @@ -65842,253 +61277,188 @@ lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +/* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +/* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ std::pair -lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { +lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *end = buf + len; - // load addr align 32 - while (((uint64_t)buf & 0x1F) && buf < end) { - uint32_t word = *buf; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - buf++; - } - - __m256i v_c080 = lasx_splat_u16(0xc080); - __m256i v_07ff = lasx_splat_u16(0x07ff); - __m256i v_dfff = lasx_splat_u16(0xdfff); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - __m256i forbidden_bytemask = __lasx_xvldi(0x0); + __m128i v_c080 = lsx_splat_u16(0xc080); + __m128i v_07ff = lsx_splat_u16(0x07ff); + __m128i v_dfff = lsx_splat_u16(0xdfff); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i forbidden_bytemask = __lsx_vldi(0x0); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); - if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! // 1. pack the bytes // obviously suboptimal. - __m256i utf8_packed = __lasx_xvpermi_d( - __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); // 2. store (8 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + __lsx_vst(utf8_packed, utf8_output, 0); // 3. adjust pointers - buf += 16; - utf8_output += 16; + buf += 8; + utf8_output += 8; continue; // we are done for this round! } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = __lasx_xvor_v(t1, t2); + const __m128i t3 = __lsx_vor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = __lasx_xvor_v(t3, v_c080); + const __m128i t4 = __lsx_vor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = - __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); // 4. pack the bytes - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = __lsx_vshuf_b( - zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = __lsx_vshuf_b( - zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; + __lsx_vst(utf8_packed, utf8_output, 0); - buf += 16; + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; continue; } else { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & - #3 in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); - t0 = __lasx_xvilvl_b(t0, t0); + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); + __m128i s2 = __lsx_vor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m256i one_or_two_bytes_bytemask = - __lasx_xvsle_hu(utf16_packed, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); - __m256i one_or_two_bytes_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - __m256i one_byte_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)); - __m256i mask1 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)); + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; + buf += 8; } // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> // will produce four UTF-8 bytes. @@ -66132,262 +61502,197 @@ lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { } // while // check for invalid input - if (__lasx_xbnz_v(forbidden_bytemask)) { + if (__lsx_bnz_v(forbidden_bytemask)) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); } + return std::make_pair(buf, reinterpret_cast(utf8_output)); } std::pair -lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { +lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *start = buf; const char32_t *end = buf + len; - // load addr align 32 - while (((uint64_t)buf & 0x1F) && buf < end) { - uint32_t word = *buf; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - buf++; - } - - __m256i v_c080 = lasx_splat_u16(0xc080); - __m256i v_07ff = lasx_splat_u16(0x07ff); - __m256i v_dfff = lasx_splat_u16(0xdfff); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - __m256i forbidden_bytemask = __lasx_xvldi(0x0); + __m128i v_c080 = lsx_splat_u16(0xc080); + __m128i v_07ff = lsx_splat_u16(0x07ff); + __m128i v_dfff = lsx_splat_u16(0xdfff); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i forbidden_bytemask = __lsx_vldi(0x0); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); - if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! // 1. pack the bytes // obviously suboptimal. - __m256i utf8_packed = __lasx_xvpermi_d( - __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); // 2. store (8 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + __lsx_vst(utf8_packed, utf8_output, 0); // 3. adjust pointers - buf += 16; - utf8_output += 16; + buf += 8; + utf8_output += 8; continue; // we are done for this round! } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = __lasx_xvor_v(t1, t2); + const __m128i t3 = __lsx_vor_v(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = __lasx_xvor_v(t3, v_c080); + const __m128i t4 = __lsx_vor_v(t3, v_c080); // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = - __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); // 4. pack the bytes - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = __lsx_vshuf_b( - zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = __lsx_vshuf_b( - zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; + __lsx_vst(utf8_packed, utf8_output, 0); - buf += 16; + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; continue; } else { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); - if (__lasx_xbnz_v(forbidden_bytemask)) { + if (__lsx_bnz_v(forbidden_bytemask)) { return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf8_output)); } /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & - #3 in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); - t0 = __lasx_xvilvl_b(t0, t0); + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3F00)); + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); + __m128i s2 = __lsx_vor_v(s0, s1); // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m256i one_or_two_bytes_bytemask = - __lasx_xvsle_hu(utf16_packed, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); - __m256i one_or_two_bytes_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - __m256i one_byte_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)); - __m256i mask1 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)); + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; + buf += 8; } // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> // will produce four UTF-8 bytes. @@ -66435,74 +61740,41 @@ lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output)); } -/* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +/* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +/* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ template std::pair -lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { +lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *end = buf + len; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - uint32_t word = *buf++; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - // buf++; - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - // buf++; - } - } - - __m256i forbidden_bytemask = __lasx_xvrepli_h(0); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i v_dfff = lasx_splat_u16(0xdfff); - while (end - buf >= 16) { - __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i v_dfff = lsx_splat_u16(0xdfff); + while (end - buf >= 8) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); if simdutf_constexpr (!match_system(big_endian)) { - utf16_packed = lasx_swap_bytes(utf16_packed); + utf16_packed = lsx_swap_bytes(utf16_packed); } - __lasx_xvst(utf16_packed, utf16_output, 0); - utf16_output += 16; - buf += 16; + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; } else { - size_t forward = 15; + size_t forward = 3; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -66541,7 +61813,7 @@ lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, } // check for invalid input - if (__lasx_xbnz_v(forbidden_bytemask)) { + if (__lsx_bnz_v(forbidden_bytemask)) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); } return std::make_pair(buf, reinterpret_cast(utf16_output)); @@ -66549,72 +61821,42 @@ lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, template std::pair -lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { +lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *start = buf; const char32_t *end = buf + len; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - uint32_t word = *buf++; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(result(error_code::SURROGATE, buf - start - 1), - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start - 1), - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if simdutf_constexpr (!match_system(big_endian)) { - high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - - __m256i forbidden_bytemask = __lasx_xvrepli_h(0); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i v_dfff = lasx_splat_u16(0xdfff); - while (end - buf >= 16) { - __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i v_dfff = lsx_splat_u16(0xdfff); + while (end - buf >= 8) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 forbidden_bytemask); - if (__lasx_xbnz_v(forbidden_bytemask)) { + if (__lsx_bnz_v(forbidden_bytemask)) { return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf16_output)); } if simdutf_constexpr (!match_system(big_endian)) { - utf16_packed = lasx_swap_bytes(utf16_packed); + utf16_packed = lsx_swap_bytes(utf16_packed); } - __lasx_xvst(utf16_packed, utf16_output, 0); - utf16_output += 16; - buf += 16; + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; } else { - size_t forward = 15; + size_t forward = 3; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -66657,10 +61899,10 @@ lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf16_output)); } -/* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +/* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 #if SIMDUTF_FEATURE_BASE64 -/* begin file src/lasx/lasx_base64.cpp */ +/* begin file src/lsx/lsx_base64.cpp */ /** * References and further reading: * @@ -66701,118 +61943,100 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; uint8_t *out = (uint8_t *)dst; - v32u8 shuf; - __m256i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + v16u8 shuf; + __m128i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, base64_tbl2, base64_tbl3; - if (srclen >= 28) { - shuf = v32u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, - 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; - - v_fc0fc00 = __lasx_xvreplgr2vr_w(uint32_t(0x0fc0fc00)); - v_3f03f0 = __lasx_xvreplgr2vr_w(uint32_t(0x003f03f0)); - shift_r = __lasx_xvreplgr2vr_w(uint32_t(0x0006000a)); - shift_l = __lasx_xvreplgr2vr_w(uint32_t(0x00080004)); - base64_tbl0 = ____m256i(__lsx_vld(lookup_tbl, 0)); - base64_tbl1 = ____m256i(__lsx_vld(lookup_tbl, 16)); - base64_tbl2 = ____m256i(__lsx_vld(lookup_tbl, 32)); - base64_tbl3 = ____m256i(__lsx_vld(lookup_tbl, 48)); + if (srclen >= 16) { + shuf = v16u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + v_fc0fc00 = __lsx_vreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lsx_vreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lsx_vreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lsx_vreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = __lsx_vld(lookup_tbl, 0); + base64_tbl1 = __lsx_vld(lookup_tbl, 16); + base64_tbl2 = __lsx_vld(lookup_tbl, 32); + base64_tbl3 = __lsx_vld(lookup_tbl, 48); } + size_t i = 0; - for (; i + 100 <= srclen; i += 96) { - __m128i in0_lo = + for (; i + 52 <= srclen; i += 48) { + __m128i in0 = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); - __m128i in0_hi = + __m128i in1 = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); - __m128i in1_lo = + __m128i in2 = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); - __m128i in1_hi = + __m128i in3 = __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); - __m128i in2_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 4); - __m128i in2_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 5); - __m128i in3_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 6); - __m128i in3_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 7); - - __m256i in0 = lasx_set_q(in0_hi, in0_lo); - __m256i in1 = lasx_set_q(in1_hi, in1_lo); - __m256i in2 = lasx_set_q(in2_hi, in2_lo); - __m256i in3 = lasx_set_q(in3_hi, in3_lo); - in0 = __lasx_xvshuf_b(in0, in0, (__m256i)shuf); - in1 = __lasx_xvshuf_b(in1, in1, (__m256i)shuf); - in2 = __lasx_xvshuf_b(in2, in2, (__m256i)shuf); - in3 = __lasx_xvshuf_b(in3, in3, (__m256i)shuf); + in0 = __lsx_vshuf_b(in0, in0, (__m128i)shuf); + in1 = __lsx_vshuf_b(in1, in1, (__m128i)shuf); + in2 = __lsx_vshuf_b(in2, in2, (__m128i)shuf); + in3 = __lsx_vshuf_b(in3, in3, (__m128i)shuf); - __m256i t0_0 = __lasx_xvand_v(in0, v_fc0fc00); - __m256i t0_1 = __lasx_xvand_v(in1, v_fc0fc00); - __m256i t0_2 = __lasx_xvand_v(in2, v_fc0fc00); - __m256i t0_3 = __lasx_xvand_v(in3, v_fc0fc00); + __m128i t0_0 = __lsx_vand_v(in0, v_fc0fc00); + __m128i t0_1 = __lsx_vand_v(in1, v_fc0fc00); + __m128i t0_2 = __lsx_vand_v(in2, v_fc0fc00); + __m128i t0_3 = __lsx_vand_v(in3, v_fc0fc00); - __m256i t1_0 = __lasx_xvsrl_h(t0_0, shift_r); - __m256i t1_1 = __lasx_xvsrl_h(t0_1, shift_r); - __m256i t1_2 = __lasx_xvsrl_h(t0_2, shift_r); - __m256i t1_3 = __lasx_xvsrl_h(t0_3, shift_r); + __m128i t1_0 = __lsx_vsrl_h(t0_0, shift_r); + __m128i t1_1 = __lsx_vsrl_h(t0_1, shift_r); + __m128i t1_2 = __lsx_vsrl_h(t0_2, shift_r); + __m128i t1_3 = __lsx_vsrl_h(t0_3, shift_r); - __m256i t2_0 = __lasx_xvand_v(in0, v_3f03f0); - __m256i t2_1 = __lasx_xvand_v(in1, v_3f03f0); - __m256i t2_2 = __lasx_xvand_v(in2, v_3f03f0); - __m256i t2_3 = __lasx_xvand_v(in3, v_3f03f0); + __m128i t2_0 = __lsx_vand_v(in0, v_3f03f0); + __m128i t2_1 = __lsx_vand_v(in1, v_3f03f0); + __m128i t2_2 = __lsx_vand_v(in2, v_3f03f0); + __m128i t2_3 = __lsx_vand_v(in3, v_3f03f0); - __m256i t3_0 = __lasx_xvsll_h(t2_0, shift_l); - __m256i t3_1 = __lasx_xvsll_h(t2_1, shift_l); - __m256i t3_2 = __lasx_xvsll_h(t2_2, shift_l); - __m256i t3_3 = __lasx_xvsll_h(t2_3, shift_l); + __m128i t3_0 = __lsx_vsll_h(t2_0, shift_l); + __m128i t3_1 = __lsx_vsll_h(t2_1, shift_l); + __m128i t3_2 = __lsx_vsll_h(t2_2, shift_l); + __m128i t3_3 = __lsx_vsll_h(t2_3, shift_l); - __m256i input0 = __lasx_xvor_v(t1_0, t3_0); - __m256i input0_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input0); - __m256i input0_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input0, __lasx_xvldi(32))); - __m256i input0_mask = __lasx_xvslei_bu(input0, 31); - __m256i input0_result = - __lasx_xvbitsel_v(input0_shuf1, input0_shuf0, input0_mask); - __lasx_xvst(input0_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; + __m128i input0 = __lsx_vor_v(t1_0, t3_0); + __m128i input0_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input0); + __m128i input0_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input0, __lsx_vldi(32))); + __m128i input0_mask = __lsx_vslei_bu(input0, 31); + __m128i input0_result = + __lsx_vbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lsx_vst(input0_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; - __m256i input1 = __lasx_xvor_v(t1_1, t3_1); - __m256i input1_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input1); - __m256i input1_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input1, __lasx_xvldi(32))); - __m256i input1_mask = __lasx_xvslei_bu(input1, 31); - __m256i input1_result = - __lasx_xvbitsel_v(input1_shuf1, input1_shuf0, input1_mask); - __lasx_xvst(input1_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; + __m128i input1 = __lsx_vor_v(t1_1, t3_1); + __m128i input1_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input1); + __m128i input1_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input1, __lsx_vldi(32))); + __m128i input1_mask = __lsx_vslei_bu(input1, 31); + __m128i input1_result = + __lsx_vbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lsx_vst(input1_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; - __m256i input2 = __lasx_xvor_v(t1_2, t3_2); - __m256i input2_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input2); - __m256i input2_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input2, __lasx_xvldi(32))); - __m256i input2_mask = __lasx_xvslei_bu(input2, 31); - __m256i input2_result = - __lasx_xvbitsel_v(input2_shuf1, input2_shuf0, input2_mask); - __lasx_xvst(input2_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; + __m128i input2 = __lsx_vor_v(t1_2, t3_2); + __m128i input2_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input2); + __m128i input2_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input2, __lsx_vldi(32))); + __m128i input2_mask = __lsx_vslei_bu(input2, 31); + __m128i input2_result = + __lsx_vbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lsx_vst(input2_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; - __m256i input3 = __lasx_xvor_v(t1_3, t3_3); - __m256i input3_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input3); - __m256i input3_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input3, __lasx_xvldi(32))); - __m256i input3_mask = __lasx_xvslei_bu(input3, 31); - __m256i input3_result = - __lasx_xvbitsel_v(input3_shuf1, input3_shuf0, input3_mask); - __lasx_xvst(input3_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; + __m128i input3 = __lsx_vor_v(t1_3, t3_3); + __m128i input3_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input3); + __m128i input3_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input3, __lsx_vldi(32))); + __m128i input3_mask = __lsx_vslei_bu(input3, 31); + __m128i input3_result = + __lsx_vbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lsx_vst(input3_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; } - for (; i + 28 <= srclen; i += 24) { - - __m128i in_lo = __lsx_vld(reinterpret_cast(input + i), 0); - __m128i in_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + for (; i + 16 <= srclen; i += 12) { - __m256i in = lasx_set_q(in_hi, in_lo); + __m128i in = __lsx_vld(reinterpret_cast(input + i), 0); // bytes from groups A, B and C are needed in separate 32-bit lanes // in = [DDDD|CCCC|BBBB|AAAA] @@ -66826,32 +62050,33 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ // processed bits - in = __lasx_xvshuf_b(in, in, (__m256i)shuf); + in = __lsx_vshuf_b(in, in, (__m128i)shuf); // unpacking // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - __m256i t0 = __lasx_xvand_v(in, v_fc0fc00); + __m128i t0 = __lsx_vand_v(in, v_fc0fc00); // t1 = [00000000|00cccccc|00000000|00aaaaaa] // ((c >> 6), (a >> 10)) - __m256i t1 = __lasx_xvsrl_h(t0, shift_r); + __m128i t1 = __lsx_vsrl_h(t0, shift_r); // t2 = [00000000|00dddddd|000000bb|bbbb0000] - __m256i t2 = __lasx_xvand_v(in, v_3f03f0); + __m128i t2 = __lsx_vand_v(in, v_3f03f0); // t3 = [00dddddd|00000000|00bbbbbb|00000000] // ((d << 8), (b << 4)) - __m256i t3 = __lasx_xvsll_h(t2, shift_l); + __m128i t3 = __lsx_vsll_h(t2, shift_l); // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - __m256i indices = __lasx_xvor_v(t1, t3); + __m128i indices = __lsx_vor_v(t1, t3); - __m256i indices_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, indices); - __m256i indices_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(indices, __lasx_xvldi(32))); - __m256i indices_mask = __lasx_xvslei_bu(indices, 31); - __m256i indices_result = - __lasx_xvbitsel_v(indices_shuf1, indices_shuf0, indices_mask); - __lasx_xvst(indices_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; + __m128i indices_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, indices); + __m128i indices_shuf1 = __lsx_vshuf_b( + base64_tbl3, base64_tbl2, __lsx_vsub_b(indices, __lsx_vldi(32))); + __m128i indices_mask = __lsx_vslei_bu(indices, 31); + __m128i indices_result = + __lsx_vbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + + __lsx_vst(indices_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; } return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, @@ -66875,7 +62100,7 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { tables::base64::thintable_epi8[mask2]}; // we increment by 0x08 the second half of the mask - const v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + v4u32 hi = {0, 0, 0x08080808, 0x08080808}; __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); // this is the version "nearly pruned" @@ -66897,111 +62122,115 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { } struct block64 { - __m256i chunks[2]; + __m128i chunks[4]; }; template -static inline uint32_t to_base64_mask(__m256i *src, bool *error) { - __m256i ascii_space_tbl = - ____m256i((__m128i)v16u8{0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}); +static inline uint16_t to_base64_mask(__m128i *src, bool *error) { + const v16u8 ascii_space_tbl = {0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}; // credit: aqrit - __m256i delta_asso; + /* + '0'(0x30)-'9'(0x39) => delta_values_index = 4 + 'A'(0x41)-'Z'(0x5a) => delta_values_index = 4/5/12(4+8) + 'a'(0x61)-'z'(0x7a) => delta_values_index = 6/7/14(6+8) + '+'(0x2b) => delta_values_index = 3 + '/'(0x2f) => delta_values_index = 2+8 = 10 + '-'(0x2d) => delta_values_index = 2+8 = 10 + '_'(0x5f) => delta_values_index = 5+8 = 13 + */ + v16u8 delta_asso; if (default_or_url) { - delta_asso = - ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x11, 0x0, 0x16}); + delta_asso = v16u8{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16}; } else { - delta_asso = - ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, - 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}); + delta_asso = v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}; } - __m256i delta_values; + v16i8 delta_values; if (default_or_url) { - delta_values = ____m256i( - (__m128i)v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), - int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}); + delta_values = + v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), + int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}; } else if (base64_url) { - delta_values = ____m256i( - (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), - int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}); + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}; } else { - delta_values = ____m256i( - (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}); + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}; } - __m256i check_asso; + v16u8 check_asso; if (default_or_url) { - check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x0E, 0x0B, 0x06}); - + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06}; } else if (base64_url) { - check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x06, 0x0B, 0x12}); + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x06, 0x0B, 0x12}; } else { - check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x0B, 0x0B, 0x0F}); + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F}; } - __m256i check_values; + v16i8 check_values; if (default_or_url) { - - check_values = ____m256i( - (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), - int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}); + check_values = + v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), + int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}; } else if (base64_url) { - check_values = ____m256i( - (__m128i)v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), - int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}); + check_values = v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}; } else { - check_values = ____m256i( - (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}); + check_values = + v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}; } - __m256i shifted = __lasx_xvsrli_b(*src, 3); - __m256i asso_index = __lasx_xvand_v(*src, __lasx_xvldi(0xF)); - __m256i delta_hash = __lasx_xvavgr_bu( - __lasx_xvshuf_b(delta_asso, delta_asso, asso_index), shifted); - __m256i check_hash = __lasx_xvavgr_bu( - __lasx_xvshuf_b(check_asso, check_asso, asso_index), shifted); + const __m128i shifted = __lsx_vsrli_b(*src, 3); + __m128i asso_index = __lsx_vand_v(*src, __lsx_vldi(0xF)); + const __m128i delta_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)delta_asso, (__m128i)delta_asso, + (__m128i)asso_index), + shifted); + const __m128i check_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)check_asso, (__m128i)check_asso, + (__m128i)asso_index), + shifted); - __m256i out = __lasx_xvsadd_b( - __lasx_xvshuf_b(delta_values, delta_values, delta_hash), *src); - __m256i chk = __lasx_xvsadd_b( - __lasx_xvshuf_b(check_values, check_values, check_hash), *src); - __m256i chk_ltz = __lasx_xvmskltz_b(chk); - unsigned int mask = __lasx_xvpickve2gr_wu(chk_ltz, 0); - mask = mask | (__lsx_vpickve2gr_hu(lasx_extracti128_hi(chk_ltz), 0) << 16); + const __m128i out = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)delta_values, (__m128i)delta_values, + (__m128i)delta_hash), + *src); + const __m128i chk = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)check_values, (__m128i)check_values, + (__m128i)check_hash), + *src); + unsigned int mask = __lsx_vpickve2gr_hu(__lsx_vmskltz_b(chk), 0); if (mask) { - __m256i ascii_space = __lasx_xvseq_b( - __lasx_xvshuf_b(ascii_space_tbl, ascii_space_tbl, asso_index), *src); - __m256i ascii_space_ltz = __lasx_xvmskltz_b(ascii_space); - unsigned int ascii_space_mask = __lasx_xvpickve2gr_wu(ascii_space_ltz, 0); - ascii_space_mask = - ascii_space_mask | - (__lsx_vpickve2gr_hu(lasx_extracti128_hi(ascii_space_ltz), 0) << 16); - *error |= (mask != ascii_space_mask); + __m128i ascii_space = __lsx_vseq_b(__lsx_vshuf_b((__m128i)ascii_space_tbl, + (__m128i)ascii_space_tbl, + (__m128i)asso_index), + *src); + *error |= + (mask != __lsx_vpickve2gr_hu(__lsx_vmskltz_b((__m128i)ascii_space), 0)); } *src = out; - return (uint32_t)mask; + return (uint16_t)mask; } template @@ -67011,12 +62240,18 @@ static inline uint64_t to_base64_mask(block64 *b, bool *error) { to_base64_mask(&b->chunks[0], error); uint64_t m1 = to_base64_mask(&b->chunks[1], error); - return m0 | (m1 << 32); + uint64_t m2 = + to_base64_mask(&b->chunks[2], error); + uint64_t m3 = + to_base64_mask(&b->chunks[3], error); + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); } static inline void copy_block(block64 *b, char *output) { - __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output), 0); - __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output), 32); + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output), 16); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output), 32); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output), 48); } static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { @@ -67024,80 +62259,152 @@ static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { uint64_t count = __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); uint16_t *count_ptr = (uint16_t *)&count; - compress(lasx_extracti128_lo(b->chunks[0]), uint16_t(mask), output); - compress(lasx_extracti128_hi(b->chunks[0]), uint16_t(mask >> 16), - output + count_ptr[0]); - compress(lasx_extracti128_lo(b->chunks[1]), uint16_t(mask >> 32), + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), output + count_ptr[0]); + compress(b->chunks[2], uint16_t(mask >> 32), output + count_ptr[0] + count_ptr[1]); - compress(lasx_extracti128_hi(b->chunks[1]), uint16_t(mask >> 48), + compress(b->chunks[3], uint16_t(mask >> 48), output + count_ptr[0] + count_ptr[1] + count_ptr[2]); return count_ones(nmask); } +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +inline size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + // Predefine the index vector + const v16u8 v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + switch (pos64 >> 4) { + case 0b00: { + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); // v1 > v0 + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[0], b->chunks[0], sh); + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16 - 1), + 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16 - 1), + 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b01: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[1], b->chunks[1], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16 - 1), + 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b10: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[2], b->chunks[2], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 2 * 16), 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b11: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[3], b->chunks[3], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 3 * 16), 0); + } break; + } + return 63; +} + // The caller of this function is responsible to ensure that there are 64 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = __lasx_xvld(reinterpret_cast(src), 0); - b->chunks[1] = __lasx_xvld(reinterpret_cast(src), 32); + b->chunks[0] = __lsx_vld(reinterpret_cast(src), 0); + b->chunks[1] = __lsx_vld(reinterpret_cast(src), 16); + b->chunks[2] = __lsx_vld(reinterpret_cast(src), 32); + b->chunks[3] = __lsx_vld(reinterpret_cast(src), 48); } // The caller of this function is responsible to ensure that there are 128 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char16_t *src) { - __m256i m1 = __lasx_xvld(reinterpret_cast(src), 0); - __m256i m2 = __lasx_xvld(reinterpret_cast(src), 32); - __m256i m3 = __lasx_xvld(reinterpret_cast(src), 64); - __m256i m4 = __lasx_xvld(reinterpret_cast(src), 96); - b->chunks[0] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m2, m1, 0), 0b11011000); - b->chunks[1] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m4, m3, 0), 0b11011000); + __m128i m1 = __lsx_vld(reinterpret_cast(src), 0); + __m128i m2 = __lsx_vld(reinterpret_cast(src), 16); + __m128i m3 = __lsx_vld(reinterpret_cast(src), 32); + __m128i m4 = __lsx_vld(reinterpret_cast(src), 48); + __m128i m5 = __lsx_vld(reinterpret_cast(src), 64); + __m128i m6 = __lsx_vld(reinterpret_cast(src), 80); + __m128i m7 = __lsx_vld(reinterpret_cast(src), 96); + __m128i m8 = __lsx_vld(reinterpret_cast(src), 112); + b->chunks[0] = __lsx_vssrlni_bu_h(m2, m1, 0); + b->chunks[1] = __lsx_vssrlni_bu_h(m4, m3, 0); + b->chunks[2] = __lsx_vssrlni_bu_h(m6, m5, 0); + b->chunks[3] = __lsx_vssrlni_bu_h(m8, m7, 0); } -static inline void base64_decode(char *out, __m256i str) { - __m256i t0 = __lasx_xvor_v( - __lasx_xvslli_w(str, 26), - __lasx_xvslli_w(__lasx_xvand_v(str, lasx_splat_u32(0x0000ff00)), 12)); - __m256i t1 = - __lasx_xvsrli_w(__lasx_xvand_v(str, lasx_splat_u32(0x003f0000)), 2); - __m256i t2 = __lasx_xvor_v(t0, t1); - __m256i t3 = __lasx_xvor_v(t2, __lasx_xvsrli_w(str, 16)); - __m256i pack_shuffle = ____m256i( - (__m128i)v16u8{3, 2, 1, 7, 6, 5, 11, 10, 9, 15, 14, 13, 0, 0, 0, 0}); - t3 = __lasx_xvshuf_b(t3, t3, (__m256i)pack_shuffle); +static inline void base64_decode(char *out, __m128i str) { + __m128i t0 = __lsx_vor_v( + __lsx_vslli_w(str, 26), + __lsx_vslli_w(__lsx_vand_v(str, lsx_splat_u32(0x0000FF00)), 12)); + __m128i t1 = __lsx_vsrli_w(__lsx_vand_v(str, lsx_splat_u32(0x003F0000)), 2); + __m128i t2 = __lsx_vor_v(t0, t1); + __m128i t3 = __lsx_vor_v(t2, __lsx_vsrli_w(str, 16)); + const v16u8 pack_shuffle = {3, 2, 1, 7, 6, 5, 11, 10, + 9, 15, 14, 13, 0, 0, 0, 0}; + t3 = __lsx_vshuf_b(t3, t3, (__m128i)pack_shuffle); // Store the output: - __lsx_vst(lasx_extracti128_lo(t3), out, 0); - __lsx_vst(lasx_extracti128_hi(t3), out, 12); + // we only need 12. + __lsx_vstelm_d(t3, out, 0, 0); + __lsx_vstelm_w(t3, out + 8, 0, 2); } // decode 64 bytes and output 48 bytes static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); + base64_decode(out, __lsx_vld(reinterpret_cast(src), 0)); + base64_decode(out + 12, + __lsx_vld(reinterpret_cast(src), 16)); base64_decode(out + 24, - __lasx_xvld(reinterpret_cast(src), 32)); + __lsx_vld(reinterpret_cast(src), 32)); + base64_decode(out + 36, + __lsx_vld(reinterpret_cast(src), 48)); } - static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); - alignas(32) char buffer[32]; - base64_decode(buffer, - __lasx_xvld(reinterpret_cast(src), 32)); - std::memcpy(out + 24, buffer, 24); + base64_decode_block(out, src); } - static inline void base64_decode_block(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - base64_decode(out + 24, b->chunks[1]); + base64_decode(out + 12, b->chunks[1]); + base64_decode(out + 24, b->chunks[2]); + base64_decode(out + 36, b->chunks[3]); } static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - alignas(32) char buffer[32]; - base64_decode(buffer, b->chunks[1]); - std::memcpy(out + 24, buffer, 24); + base64_decode_block(out, b); } template + typename char_type> full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, +compress_decode_base64(char *dst, const char_type *src, size_t srclen, base64_options options, last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = @@ -67115,19 +62422,15 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } return {SUCCESS, full_input_length, 0}; } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; - - const chartype *const srcinit = src; + const char_type *const srcinit = src; const char *const dstinit = dst; - const chartype *const srcend = src + srclen; + const char_type *const srcend = src + srclen; - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); + constexpr size_t block_size = 10; char buffer[block_size * 64]; char *bufferptr = buffer; if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; + const char_type *const srcend64 = src + srclen - 64; while (src <= srcend64) { block64 b; load_block(&b, src); @@ -67135,55 +62438,49 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, bool error = false; uint64_t badcharmask = to_base64_mask(&b, &error); - if (error && !ignore_garbage) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; + if (badcharmask) { + if (error && !ignore_garbage) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + if (src < srcend) { + // should never happen + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; } + if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, &b); + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); } else { - base64_decode_block(dst, &b); + bufferptr += compress_block(&b, badcharmask, bufferptr); } - dst += 48; + } else { + // optimization opportunity: if bufferptr == buffer and mask == 0, we + // can avoid the call to compress_block and decode directly. + copy_block(&b, bufferptr); + bufferptr += 64; } if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { + for (size_t i = 0; i < (block_size - 1); i++) { base64_decode_block(dst, buffer + i * 64); dst += 48; } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; std::memcpy(buffer, buffer + (block_size - 1) * 64, 64); // 64 might be too much bufferptr -= (block_size - 1) * 64; } } } - char *buffer_start = buffer; // Optimization note: if this is almost full, then it is worth our // time, otherwise, we should just decode directly. int last_block = (int)((bufferptr - buffer_start) % 64); if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); @@ -67198,11 +62495,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); - } else { - base64_decode_block(dst, buffer_start); - } + base64_decode_block(dst, buffer_start); dst += 48; } if ((bufferptr - buffer_start) % 64 != 0) { @@ -67212,7 +62505,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - // lasx is little-endian + // lsx is little-endian triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 4); @@ -67225,7 +62518,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - // lasx is little-endian + // lsx is little-endian triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 3); @@ -67285,24 +62578,22 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } return {SUCCESS, srclen, size_t(dst - dstinit)}; } -/* end file src/lasx/lasx_base64.cpp */ -/* begin file src/lasx/lasx_find.cpp */ +/* end file src/lsx/lsx_base64.cpp */ +/* begin file src/lsx/lsx_find.cpp */ simdutf_really_inline const char *util_find(const char *start, const char *end, char character) noexcept { if (start >= end) return end; - const int step = 32; - __m256i char_vec = __lasx_xvreplgr2vr_b(static_cast(character)); + const int step = 16; + __m128i char_vec = __lsx_vreplgr2vr_b(static_cast(character)); while (end - start >= step) { - __m256i data = __lasx_xvld(reinterpret_cast(start), 0); - __m256i cmp = __lasx_xvseq_b(data, char_vec); - if (__lasx_xbnz_v(cmp)) { - __m256i res = __lasx_xvmsknz_b(cmp); - uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); - uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); - uint32_t mask = (mask0 | (mask1 << 16)); + __m128i data = __lsx_vld(reinterpret_cast(start), 0); + __m128i cmp = __lsx_vseq_b(data, char_vec); + if (__lsx_bnz_v(cmp)) { + uint16_t mask = + static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); return start + trailing_zeroes(mask); } @@ -67325,17 +62616,15 @@ simdutf_really_inline const char16_t *util_find(const char16_t *start, if (start >= end) return end; - const int step = 16; - __m256i char_vec = __lasx_xvreplgr2vr_h(static_cast(character)); + const int step = 8; + __m128i char_vec = __lsx_vreplgr2vr_h(static_cast(character)); while (end - start >= step) { - __m256i data = __lasx_xvld(reinterpret_cast(start), 0); - __m256i cmp = __lasx_xvseq_h(data, char_vec); - if (__lasx_xbnz_v(cmp)) { - __m256i res = __lasx_xvmsknz_b(cmp); - uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); - uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); - uint32_t mask = (mask0 | (mask1 << 16)); + __m128i data = __lsx_vld(reinterpret_cast(start), 0); + __m128i cmp = __lsx_vseq_h(data, char_vec); + if (__lsx_bnz_v(cmp)) { + uint16_t mask = + static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); return start + trailing_zeroes(mask) / 2; } @@ -67351,16 +62640,16 @@ simdutf_really_inline const char16_t *util_find(const char16_t *start, return end; } -/* end file src/lasx/lasx_find.cpp */ +/* end file src/lsx/lsx_find.cpp */ #endif // SIMDUTF_FEATURE_BASE64 } // namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* begin file src/generic/buf_block_reader.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { // Walks through a buffer in block-sized increments, loading the last part with @@ -67466,13 +62755,13 @@ simdutf_really_inline void buf_block_reader::advance() { } } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/buf_block_reader.h */ #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_validation { @@ -67692,12 +62981,12 @@ struct utf8_checker { using utf8_validation::utf8_checker; } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ /* begin file src/generic/utf8_validation/utf8_validator.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_validation { @@ -67778,14 +63067,14 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { } // namespace utf8_validation } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_validation/utf8_validator.h */ #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_ASCII /* begin file src/generic/ascii_validation.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace ascii_validation { @@ -67832,7 +63121,7 @@ bool generic_validate_ascii(const char *input, size_t length) { } // namespace ascii_validation } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/ascii_validation.h */ #endif // SIMDUTF_FEATURE_ASCII @@ -67841,7 +63130,7 @@ bool generic_validate_ascii(const char *input, size_t length) { // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -68154,12 +63443,12 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_latin1 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -68233,16 +63522,17 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace utf8_to_latin1 } // namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 // transcoding from UTF-8 to UTF-16 /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_utf16 { @@ -68313,12 +63603,12 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_utf16 { using namespace simd; @@ -68647,12 +63937,12 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ /* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8 { @@ -68702,15 +63992,16 @@ simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, } // namespace utf8 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 // transcoding from UTF-8 to UTF-32 /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_utf32 { @@ -68749,12 +64040,12 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8_to_utf32 { using namespace simd; @@ -69069,7 +64360,7 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -69077,7 +64368,7 @@ struct validating_transcoder { #if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/utf8.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf8 { @@ -69166,7 +64457,7 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace utf8 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf8.h */ #endif // SIMDUTF_FEATURE_UTF8 @@ -69174,7 +64465,7 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, #if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16/count_code_points_bytemask.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { @@ -69189,11 +64480,11 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, size_t pos = 0; size_t count = 0; - constexpr size_t max_itertions = 65535; + constexpr size_t max_iterations = 65535; const auto one = vector_u16::splat(1); const auto zero = vector_u16::zero(); - size_t itertion = 0; + size_t iteration = 0; auto counters = zero; for (; pos < size / N * N; pos += N) { @@ -69211,15 +64502,15 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, counters += t2; - itertion += 1; - if (itertion == max_itertions) { + iteration += 1; + if (iteration == max_iterations) { count += counters.sum(); counters = zero; - itertion = 0; + iteration = 0; } } - if (itertion > 0) { + if (iteration > 0) { count += counters.sum(); } @@ -69229,12 +64520,12 @@ simdutf_really_inline size_t count_code_points(const char16_t *in, } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf16/count_code_points_bytemask.h */ /* begin file src/generic/utf16/change_endianness.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { @@ -69255,12 +64546,12 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf16/change_endianness.h */ /* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { @@ -69456,12 +64747,12 @@ utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ /* begin file src/generic/utf16/utf32_length_from_utf16.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { @@ -69473,12 +64764,12 @@ simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf16/utf32_length_from_utf16.h */ /* begin file src/generic/utf16/to_well_formed.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { @@ -69568,7 +64859,7 @@ void to_well_formed(const char16_t *in, size_t n, char16_t *out) { } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf16/to_well_formed.h */ #endif // SIMDUTF_FEATURE_UTF16 @@ -69576,7 +64867,7 @@ void to_well_formed(const char16_t *in, size_t n, char16_t *out) { #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/validate_utf16.h */ namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf16 { /* @@ -69737,7 +65028,7 @@ const result validate_utf16_as_ascii_with_errors(const char16_t *input, } // namespace utf16 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/validate_utf16.h */ #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING @@ -69747,7 +65038,7 @@ const result validate_utf16_as_ascii_with_errors(const char16_t *input, #include namespace simdutf { -namespace lasx { +namespace lsx { namespace { namespace utf32 { @@ -69878,7 +65169,7 @@ simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, } // namespace utf32 } // unnamed namespace -} // namespace lasx +} // namespace lsx } // namespace simdutf /* end file src/generic/utf32.h */ #endif // SIMDUTF_FEATURE_UTF32 @@ -69887,7 +65178,7 @@ simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, // Implementation-specific overrides // namespace simdutf { -namespace lasx { +namespace lsx { #if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int @@ -69921,33 +65212,33 @@ implementation::detect_encodings(const char *input, #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_utf8(buf, len); + return lsx::utf8_validation::generic_validate_utf8(buf, len); } #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); + return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } #endif // SIMDUTF_FEATURE_UTF8 #if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lasx::ascii_validation::generic_validate_ascii(buf, len); + return lsx::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return lasx::ascii_validation::generic_validate_ascii_with_errors(buf, len); + return lsx::ascii_validation::generic_validate_ascii_with_errors(buf, len); } #endif // SIMDUTF_FEATURE_ASCII #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept { - return lasx::utf16::validate_utf16_as_ascii_with_errors( + return lsx::utf16::validate_utf16_as_ascii_with_errors( buf, len) .error == SUCCESS; } @@ -69955,8 +65246,8 @@ implementation::validate_utf16le_as_ascii(const char16_t *buf, simdutf_warn_unused bool implementation::validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept { - return lasx::utf16::validate_utf16_as_ascii_with_errors(buf, - len) + return lsx::utf16::validate_utf16_as_ascii_with_errors(buf, + len) .error == SUCCESS; } #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII @@ -69969,7 +65260,8 @@ implementation::validate_utf16le(const char16_t *buf, return true; } const auto res = - lasx::utf16::validate_utf16_with_errors(buf, len); + lsx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } @@ -69991,9 +65283,9 @@ implementation::validate_utf16be(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const auto res = - lasx::utf16::validate_utf16_with_errors(buf, len); + lsx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } @@ -70012,7 +65304,7 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( return result(error_code::SUCCESS, 0); } const result res = - lasx::utf16::validate_utf16_with_errors(buf, len); + lsx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { const result scalar_res = scalar::utf16::validate_with_errors( @@ -70029,7 +65321,7 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( return result(error_code::SUCCESS, 0); } const result res = - lasx::utf16::validate_utf16_with_errors(buf, len); + lsx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { const result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, @@ -70042,12 +65334,12 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept { - return utf16::to_well_formed(input, len, output); + utf16::to_well_formed(input, len, output); } void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept { - return utf16::to_well_formed(input, len, output); + utf16::to_well_formed(input, len, output); } #endif // SIMDUTF_FEATURE_UTF16 @@ -70058,7 +65350,7 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { // empty input is valid. protected the implementation from nullptr. return true; } - const char32_t *tail = lasx_validate_utf32le(buf, len); + const char32_t *tail = lsx_validate_utf32le(buf, len); if (tail) { return scalar::utf32::validate(tail, len - (tail - buf)); } else { @@ -70073,7 +65365,7 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lasx_validate_utf32le_with_errors(buf, len); + result res = lsx_validate_utf32le_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count); @@ -70088,7 +65380,7 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lasx_convert_latin1_to_utf8(buf, len, utf8_output); + lsx_convert_latin1_to_utf8(buf, len, utf8_output); size_t converted_chars = ret.second - utf8_output; if (ret.first != buf + len) { @@ -70104,7 +65396,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lasx_convert_latin1_to_utf16le(buf, len, utf16_output); + lsx_convert_latin1_to_utf16le(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = @@ -70118,7 +65410,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lasx_convert_latin1_to_utf16be(buf, len, utf16_output); + lsx_convert_latin1_to_utf16be(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = @@ -70134,7 +65426,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lasx_convert_latin1_to_utf32(buf, len, utf32_output); + lsx_convert_latin1_to_utf32(buf, len, utf32_output); size_t converted_chars = ret.second - utf32_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( @@ -70148,117 +65440,19 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if (pos + 1 >= len) - return 0; - if ((buf[pos] & 0b11100000) == 0b11000000) { - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return 0; - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0xFF < code_point) { - return 0; - } - *latin1_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return convert_size; utf8_to_latin1::validating_transcoder converter; - size_t convert_result = - converter.convert(buf + pos, len - pos, latin1_output); - return convert_result ? convert_size + convert_result : 0; + return converter.convert(buf, len, latin1_output); } simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if ((buf[pos] & 0b11100000) == 0b11000000) { - if (pos + 1 >= len) - return result(error_code::TOO_SHORT, pos); - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return result(error_code::TOO_SHORT, pos); - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - if (code_point < 0x80) - return result(error_code::OVERLONG, pos); - if (0xFF < code_point) - return result(error_code::TOO_LARGE, pos); - *latin1_output++ = char(code_point); - pos += 2; - } else if ((buf[pos] & 0b11110000) == 0b11100000) { - return result(error_code::TOO_LARGE, pos); - } else if ((buf[pos] & 0b11111000) == 0b11110000) { - return result(error_code::TOO_LARGE, pos); - } else { - if ((buf[pos] & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } - return result(error_code::HEADER_BITS, pos); - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return result(error_code::SUCCESS, convert_size); - utf8_to_latin1::validating_transcoder converter; - result res = - converter.convert_with_errors(buf + pos, len - pos, latin1_output); - return res.error ? result(res.error, res.count + pos) - : result(res.error, res.count + convert_size); + return converter.convert_with_errors(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if (pos + 1 >= len) - break; - if ((buf[pos] & 0b11100000) == 0b11000000) { - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return 0; - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - *latin1_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return convert_size; - - size_t convert_result = - lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); - return convert_result ? convert_size + convert_result : 0; + return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); } #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -70324,7 +65518,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_latin1(buf, len, latin1_output); + lsx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -70345,7 +65539,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_latin1(buf, len, latin1_output); + lsx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -70367,7 +65561,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_latin1_with_errors( + lsx_convert_utf16_to_latin1_with_errors( buf, len, latin1_output); if (ret.first.error) { return ret.first; @@ -70394,8 +65588,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); + lsx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70434,7 +65628,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_utf8(buf, len, utf8_output); + lsx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -70454,7 +65648,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_utf8(buf, len, utf8_output); + lsx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -70476,8 +65670,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70504,8 +65698,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70545,7 +65739,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( return 0; } std::pair ret = - lasx_convert_utf32_to_utf8(buf, len, utf8_output); + lsx_convert_utf32_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -70569,7 +65763,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + lsx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf8::convert_with_errors( buf + ret.first.count, len - ret.first.count, ret.second); @@ -70591,7 +65785,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_utf32(buf, len, utf32_output); + lsx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -70611,7 +65805,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - lasx_convert_utf16_to_utf32(buf, len, utf32_output); + lsx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -70633,8 +65827,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70661,8 +65855,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70689,7 +65883,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf32_to_latin1(buf, len, latin1_output); + lsx_convert_utf32_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -70709,7 +65903,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + lsx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -70733,7 +65927,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - lasx_convert_utf32_to_latin1(buf, len, latin1_output); + lsx_convert_utf32_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -70760,7 +65954,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lasx_convert_utf32_to_utf16(buf, len, utf16_output); + lsx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -70781,7 +65975,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - lasx_convert_utf32_to_utf16(buf, len, utf16_output); + lsx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -70803,8 +65997,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -70827,8 +66021,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - lasx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -70888,23 +66082,7 @@ simdutf_warn_unused size_t implementation::count_utf16be( #if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { - size_t pos = 0; - size_t count = 0; - // Performance degradation when memory address is not 32-byte aligned - while ((((uint64_t)input + pos) & 0x1F && pos < length)) { - if (input[pos++] > -65) { - count++; - } - } - __m256i v_bf = __lasx_xvldi(0xBF); // 0b10111111 - for (; pos + 32 <= length; pos += 32) { - __m256i in = __lasx_xvld(reinterpret_cast(input + pos), 0); - __m256i utf8_count = - __lasx_xvpcnt_h(__lasx_xvmskltz_b(__lasx_xvslt_b(v_bf, in))); - count = count + __lasx_xvpickve2gr_wu(utf8_count, 0) + - __lasx_xvpickve2gr_wu(utf8_count, 4); - } - return count + scalar::utf8::count_code_points(input + pos, length - pos); + return utf8::count_code_points(input, length); } #endif // SIMDUTF_FEATURE_UTF8 @@ -70990,12 +66168,12 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { - __m128i v_ffff = lsx_splat_u32(0x0000ffff); + const __m128i v_ffff = lsx_splat_u32(0x0000ffff); size_t pos = 0; size_t count = 0; for (; pos + 4 <= length; pos += 4) { __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); - __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + const __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); size_t surrogate_count = __lsx_vpickve2gr_bu( __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); count += 4 + surrogate_count; @@ -71161,14 +66339,937 @@ const char16_t *implementation::find(const char16_t *start, const char16_t *end, } #endif // SIMDUTF_FEATURE_BASE64 -} // namespace lasx +} // namespace lsx } // namespace simdutf -/* begin file src/simdutf/lasx/end.h */ +/* begin file src/simdutf/lsx/end.h */ #undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lasx/end.h */ -/* end file src/lasx/implementation.cpp */ +/* end file src/simdutf/lsx/end.h */ +/* end file src/lsx/implementation.cpp */ +#endif + +/* begin file src/simdutf_c.cpp */ +/* begin file include/simdutf_c.h */ +/*** + * simdutf_c.h.h - C API for simdutf + * This is currently experimental. + * We are committed to keeping the C API, but there might be mistakes in our + * implementation. Please report any issues you find. + */ + +#ifndef SIMDUTF_C_H +#define SIMDUTF_C_H + +#include +#include +#include + +#ifdef __has_include + #if __has_include() + #include + #else // __has_include() + #define char16_t uint16_t + #define char32_t uint32_t + #endif // __has_include() +#else // __has_include() + #define char16_t uint16_t + #define char32_t uint32_t +#endif // __has_include + +#ifdef __cplusplus +extern "C" { #endif +/* C-friendly subset of simdutf errors */ +typedef enum simdutf_error_code { + SIMDUTF_ERROR_SUCCESS = 0, + SIMDUTF_ERROR_HEADER_BITS, + SIMDUTF_ERROR_TOO_SHORT, + SIMDUTF_ERROR_TOO_LONG, + SIMDUTF_ERROR_OVERLONG, + SIMDUTF_ERROR_TOO_LARGE, + SIMDUTF_ERROR_SURROGATE, + SIMDUTF_ERROR_INVALID_BASE64_CHARACTER, + SIMDUTF_ERROR_BASE64_INPUT_REMAINDER, + SIMDUTF_ERROR_BASE64_EXTRA_BITS, + SIMDUTF_ERROR_OUTPUT_BUFFER_TOO_SMALL, + SIMDUTF_ERROR_OTHER +} simdutf_error_code; + +typedef struct simdutf_result { + simdutf_error_code error; + size_t count; /* position of error or number of code units validated */ +} simdutf_result; + +typedef enum simdutf_encoding_type { + SIMDUTF_ENCODING_UNSPECIFIED = 0, + SIMDUTF_ENCODING_UTF8 = 1, + SIMDUTF_ENCODING_UTF16_LE = 2, + SIMDUTF_ENCODING_UTF16_BE = 4, + SIMDUTF_ENCODING_UTF32_LE = 8, + SIMDUTF_ENCODING_UTF32_BE = 16 +} simdutf_encoding_type; + +/* Validate UTF-8: returns true iff input is valid UTF-8 */ +bool simdutf_validate_utf8(const char *buf, size_t len); + +/* Validate UTF-8 with detailed result */ +simdutf_result simdutf_validate_utf8_with_errors(const char *buf, size_t len); + +/* Encoding detection */ +simdutf_encoding_type simdutf_autodetect_encoding(const char *input, + size_t length); +int simdutf_detect_encodings(const char *input, size_t length); + +/* ASCII validation */ +bool simdutf_validate_ascii(const char *buf, size_t len); +simdutf_result simdutf_validate_ascii_with_errors(const char *buf, size_t len); + +/* UTF-16 ASCII checks */ +bool simdutf_validate_utf16_as_ascii(const char16_t *buf, size_t len); +bool simdutf_validate_utf16be_as_ascii(const char16_t *buf, size_t len); +bool simdutf_validate_utf16le_as_ascii(const char16_t *buf, size_t len); + +/* UTF-16/UTF-8/UTF-32 validation (native/endian-specific) */ +bool simdutf_validate_utf16(const char16_t *buf, size_t len); +bool simdutf_validate_utf16le(const char16_t *buf, size_t len); +bool simdutf_validate_utf16be(const char16_t *buf, size_t len); +simdutf_result simdutf_validate_utf16_with_errors(const char16_t *buf, + size_t len); +simdutf_result simdutf_validate_utf16le_with_errors(const char16_t *buf, + size_t len); +simdutf_result simdutf_validate_utf16be_with_errors(const char16_t *buf, + size_t len); + +bool simdutf_validate_utf32(const char32_t *buf, size_t len); +simdutf_result simdutf_validate_utf32_with_errors(const char32_t *buf, + size_t len); + +/* to_well_formed UTF-16 helpers */ +void simdutf_to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output); +void simdutf_to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output); +void simdutf_to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output); + +/* Counting */ +size_t simdutf_count_utf16(const char16_t *input, size_t length); +size_t simdutf_count_utf16le(const char16_t *input, size_t length); +size_t simdutf_count_utf16be(const char16_t *input, size_t length); +size_t simdutf_count_utf8(const char *input, size_t length); + +/* Length estimators */ +size_t simdutf_utf8_length_from_latin1(const char *input, size_t length); +size_t simdutf_latin1_length_from_utf8(const char *input, size_t length); +size_t simdutf_latin1_length_from_utf16(size_t length); +size_t simdutf_latin1_length_from_utf32(size_t length); +size_t simdutf_utf16_length_from_utf8(const char *input, size_t length); +size_t simdutf_utf32_length_from_utf8(const char *input, size_t length); +size_t simdutf_utf8_length_from_utf16(const char16_t *input, size_t length); +simdutf_result +simdutf_utf8_length_from_utf16_with_replacement(const char16_t *input, + size_t length); +size_t simdutf_utf8_length_from_utf16le(const char16_t *input, size_t length); +size_t simdutf_utf8_length_from_utf16be(const char16_t *input, size_t length); +simdutf_result +simdutf_utf8_length_from_utf16le_with_replacement(const char16_t *input, + size_t length); +simdutf_result +simdutf_utf8_length_from_utf16be_with_replacement(const char16_t *input, + size_t length); + +/* Conversions: latin1 <-> utf8, utf8 <-> utf16/utf32, utf16 <-> utf8, etc. */ +size_t simdutf_convert_latin1_to_utf8(const char *input, size_t length, + char *output); +size_t simdutf_convert_latin1_to_utf8_safe(const char *input, size_t length, + char *output, size_t utf8_len); +size_t simdutf_convert_latin1_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_latin1_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_latin1_to_utf32(const char *input, size_t length, + char32_t *output); + +size_t simdutf_convert_utf8_to_latin1(const char *input, size_t length, + char *output); +size_t simdutf_convert_utf8_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf8_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf8_to_utf16(const char *input, size_t length, + char16_t *output); + +size_t simdutf_convert_utf8_to_utf32(const char *input, size_t length, + char32_t *output); +simdutf_result simdutf_convert_utf8_to_latin1_with_errors(const char *input, + size_t length, + char *output); +simdutf_result simdutf_convert_utf8_to_utf16_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf32_with_errors(const char *input, + size_t length, + char32_t *output); + +/* Conversions assuming valid input */ +size_t simdutf_convert_valid_utf8_to_latin1(const char *input, size_t length, + char *output); +size_t simdutf_convert_valid_utf8_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_valid_utf8_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_valid_utf8_to_utf32(const char *input, size_t length, + char32_t *output); + +/* UTF-16 -> UTF-8 and related conversions */ +size_t simdutf_convert_utf16_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16le_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16be_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16_to_utf8_safe(const char16_t *input, size_t length, + char *output, size_t utf8_len); +size_t simdutf_convert_utf16_to_latin1(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16le_to_latin1(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16be_to_latin1(const char16_t *input, size_t length, + char *output); +simdutf_result +simdutf_convert_utf16_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16le_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16be_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); + +simdutf_result simdutf_convert_utf16_to_utf8_with_errors(const char16_t *input, + size_t length, + char *output); +simdutf_result +simdutf_convert_utf16le_to_utf8_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16be_to_utf8_with_errors(const char16_t *input, + size_t length, char *output); + +size_t simdutf_convert_valid_utf16_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_valid_utf16_to_latin1(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16le_to_latin1(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16be_to_latin1(const char16_t *input, + size_t length, char *output); + +size_t simdutf_convert_valid_utf16le_to_utf8(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16be_to_utf8(const char16_t *input, + size_t length, char *output); + +/* UTF-16 <-> UTF-32 conversions */ +size_t simdutf_convert_utf16_to_utf32(const char16_t *input, size_t length, + char32_t *output); +size_t simdutf_convert_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *output); +size_t simdutf_convert_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *output); +simdutf_result simdutf_convert_utf16_to_utf32_with_errors(const char16_t *input, + size_t length, + char32_t *output); +simdutf_result +simdutf_convert_utf16le_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output); +simdutf_result +simdutf_convert_utf16be_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output); + +/* Valid UTF-16 conversions */ +size_t simdutf_convert_valid_utf16_to_utf32(const char16_t *input, + size_t length, char32_t *output); +size_t simdutf_convert_valid_utf16le_to_utf32(const char16_t *input, + size_t length, char32_t *output); +size_t simdutf_convert_valid_utf16be_to_utf32(const char16_t *input, + size_t length, char32_t *output); + +/* UTF-32 -> ... conversions */ +size_t simdutf_convert_utf32_to_utf8(const char32_t *input, size_t length, + char *output); +simdutf_result simdutf_convert_utf32_to_utf8_with_errors(const char32_t *input, + size_t length, + char *output); +size_t simdutf_convert_valid_utf32_to_utf8(const char32_t *input, size_t length, + char *output); + +size_t simdutf_convert_utf32_to_utf16(const char32_t *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *output); +simdutf_result +simdutf_convert_utf32_to_latin1_with_errors(const char32_t *input, + size_t length, char *output); + +/* --- Find helpers --- */ +const char *simdutf_find(const char *start, const char *end, char character); +const char16_t *simdutf_find_utf16(const char16_t *start, const char16_t *end, + char16_t character); + +/* --- Base64 enums and helpers --- */ +typedef enum simdutf_base64_options { + SIMDUTF_BASE64_DEFAULT = 0, + SIMDUTF_BASE64_URL = 1, + SIMDUTF_BASE64_DEFAULT_NO_PADDING = 2, + SIMDUTF_BASE64_URL_WITH_PADDING = 3, + SIMDUTF_BASE64_DEFAULT_ACCEPT_GARBAGE = 4, + SIMDUTF_BASE64_URL_ACCEPT_GARBAGE = 5, + SIMDUTF_BASE64_DEFAULT_OR_URL = 8, + SIMDUTF_BASE64_DEFAULT_OR_URL_ACCEPT_GARBAGE = 12 +} simdutf_base64_options; + +typedef enum simdutf_last_chunk_handling_options { + SIMDUTF_LAST_CHUNK_LOOSE = 0, + SIMDUTF_LAST_CHUNK_STRICT = 1, + SIMDUTF_LAST_CHUNK_STOP_BEFORE_PARTIAL = 2, + SIMDUTF_LAST_CHUNK_ONLY_FULL_CHUNKS = 3 +} simdutf_last_chunk_handling_options; + +/* maximal binary length estimators */ +size_t simdutf_maximal_binary_length_from_base64(const char *input, + size_t length); +size_t simdutf_maximal_binary_length_from_base64_utf16(const char16_t *input, + size_t length); + +/* base64 decoding/encoding */ +simdutf_result simdutf_base64_to_binary( + const char *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options); +simdutf_result simdutf_base64_to_binary_utf16( + const char16_t *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options); + +size_t simdutf_base64_length_from_binary(size_t length, + simdutf_base64_options options); +size_t simdutf_base64_length_from_binary_with_lines( + size_t length, simdutf_base64_options options, size_t line_length); + +size_t simdutf_binary_to_base64(const char *input, size_t length, char *output, + simdutf_base64_options options); +size_t simdutf_binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + simdutf_base64_options options); + +/* safe decoding that provides an in/out outlen parameter */ +simdutf_result simdutf_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char); +simdutf_result simdutf_base64_to_binary_safe_utf16( + const char16_t *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SIMDUTF_C_H */ +/* end file include/simdutf_c.h */ + +static simdutf_result to_c_result(const simdutf::result &r) { + simdutf_result out; + out.error = static_cast(r.error); + out.count = r.count; + return out; +} + +/* The C wrapper depends on the library features. Only expose the C API + when all relevant feature is enabled. This helps the + single-header generator to omit the C wrapper when features are + disabled. */ +// clang-format off +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_ASCII && SIMDUTF_FEATURE_BASE64 && SIMDUTF_FEATURE_DETECT_ENCODING +// clang-format on +extern "C" { + +bool simdutf_validate_utf8(const char *buf, size_t len) { + return simdutf::validate_utf8(buf, len); +} + +simdutf_result simdutf_validate_utf8_with_errors(const char *buf, size_t len) { + return to_c_result(simdutf::validate_utf8_with_errors(buf, len)); +} + +simdutf_encoding_type simdutf_autodetect_encoding(const char *input, + size_t length) { + return static_cast( + simdutf::autodetect_encoding(input, length)); +} + +int simdutf_detect_encodings(const char *input, size_t length) { + return simdutf::detect_encodings(input, length); +} + +bool simdutf_validate_ascii(const char *buf, size_t len) { + return simdutf::validate_ascii(buf, len); +} +simdutf_result simdutf_validate_ascii_with_errors(const char *buf, size_t len) { + return to_c_result(simdutf::validate_ascii_with_errors(buf, len)); +} + +bool simdutf_validate_utf16_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16_as_ascii( + reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16be_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16be_as_ascii( + reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16le_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16le_as_ascii( + reinterpret_cast(buf), len); +} + +bool simdutf_validate_utf16(const char16_t *buf, size_t len) { + return simdutf::validate_utf16(reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16le(const char16_t *buf, size_t len) { + return simdutf::validate_utf16le(reinterpret_cast(buf), + len); +} +bool simdutf_validate_utf16be(const char16_t *buf, size_t len) { + return simdutf::validate_utf16be(reinterpret_cast(buf), + len); +} +simdutf_result simdutf_validate_utf16_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16_with_errors( + reinterpret_cast(buf), len)); +} +simdutf_result simdutf_validate_utf16le_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16le_with_errors( + reinterpret_cast(buf), len)); +} +simdutf_result simdutf_validate_utf16be_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16be_with_errors( + reinterpret_cast(buf), len)); +} + +bool simdutf_validate_utf32(const char32_t *buf, size_t len) { + return simdutf::validate_utf32(reinterpret_cast(buf), len); +} +simdutf_result simdutf_validate_utf32_with_errors(const char32_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf32_with_errors( + reinterpret_cast(buf), len)); +} + +void simdutf_to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16le(reinterpret_cast(input), + len, reinterpret_cast(output)); +} +void simdutf_to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16be(reinterpret_cast(input), + len, reinterpret_cast(output)); +} +void simdutf_to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16(reinterpret_cast(input), len, + reinterpret_cast(output)); +} + +size_t simdutf_count_utf16(const char16_t *input, size_t length) { + return simdutf::count_utf16(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf16le(const char16_t *input, size_t length) { + return simdutf::count_utf16le(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf16be(const char16_t *input, size_t length) { + return simdutf::count_utf16be(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf8(const char *input, size_t length) { + return simdutf::count_utf8(input, length); +} + +size_t simdutf_utf8_length_from_latin1(const char *input, size_t length) { + return simdutf::utf8_length_from_latin1(input, length); +} +size_t simdutf_latin1_length_from_utf8(const char *input, size_t length) { + return simdutf::latin1_length_from_utf8(input, length); +} +size_t simdutf_latin1_length_from_utf16(size_t length) { + return simdutf::latin1_length_from_utf16(length); +} +size_t simdutf_latin1_length_from_utf32(size_t length) { + return simdutf::latin1_length_from_utf32(length); +} +size_t simdutf_utf16_length_from_utf8(const char *input, size_t length) { + return simdutf::utf16_length_from_utf8(input, length); +} +size_t simdutf_utf32_length_from_utf8(const char *input, size_t length) { + return simdutf::utf32_length_from_utf8(input, length); +} +size_t simdutf_utf8_length_from_utf16(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16( + reinterpret_cast(input), length); +} +simdutf_result +simdutf_utf8_length_from_utf16_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16_with_replacement( + reinterpret_cast(input), length)); +} +size_t simdutf_utf8_length_from_utf16le(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16le( + reinterpret_cast(input), length); +} +size_t simdutf_utf8_length_from_utf16be(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16be( + reinterpret_cast(input), length); +} +simdutf_result +simdutf_utf8_length_from_utf16le_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16le_with_replacement( + reinterpret_cast(input), length)); +} +simdutf_result +simdutf_utf8_length_from_utf16be_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16be_with_replacement( + reinterpret_cast(input), length)); +} + +/* Conversions: latin1 <-> utf8, utf8 <-> utf16/utf32, utf16 <-> utf8, etc. */ +size_t simdutf_convert_latin1_to_utf8(const char *input, size_t length, + char *output) { + return simdutf::convert_latin1_to_utf8(input, length, output); +} + +size_t simdutf_convert_latin1_to_utf8_safe(const char *input, size_t length, + char *output, size_t utf8_len) { + return simdutf::convert_latin1_to_utf8_safe(input, length, output, utf8_len); +} +size_t simdutf_convert_latin1_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_latin1_to_utf16le( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_latin1_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_latin1_to_utf16be( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_latin1_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_latin1_to_utf32(input, length, + reinterpret_cast(output)); +} + +size_t simdutf_convert_utf8_to_latin1(const char *input, size_t length, + char *output) { + return simdutf::convert_utf8_to_latin1(input, length, output); +} +size_t simdutf_convert_utf8_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16le(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf16(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16be(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_utf8_to_utf32(input, length, + reinterpret_cast(output)); +} +simdutf_result simdutf_convert_utf8_to_latin1_with_errors(const char *input, + size_t length, + char *output) { + return to_c_result( + simdutf::convert_utf8_to_latin1_with_errors(input, length, output)); +} +simdutf_result simdutf_convert_utf8_to_utf16_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16le_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16be_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf32_with_errors(const char *input, + size_t length, + char32_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf32_with_errors( + input, length, reinterpret_cast(output))); +} + +/* Conversions assuming valid input */ +size_t simdutf_convert_valid_utf8_to_latin1(const char *input, size_t length, + char *output) { + return simdutf::convert_valid_utf8_to_latin1(input, length, output); +} +size_t simdutf_convert_valid_utf8_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_valid_utf8_to_utf16le( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf8_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_valid_utf8_to_utf16be( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf8_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_valid_utf8_to_utf32( + input, length, reinterpret_cast(output)); +} + +/* UTF-16 -> UTF-8 and related conversions */ +size_t simdutf_convert_utf16_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16_to_utf8_safe(const char16_t *input, size_t length, + char *output, size_t utf8_len) { + return simdutf::convert_utf16_to_utf8_safe( + reinterpret_cast(input), length, output, utf8_len); +} +size_t simdutf_convert_utf16_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16le_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16le_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16be_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16be_to_latin1( + reinterpret_cast(input), length, output); +} +simdutf_result +simdutf_convert_utf16_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16le_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16le_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16be_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16be_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} + +simdutf_result simdutf_convert_utf16_to_utf8_with_errors(const char16_t *input, + size_t length, + char *output) { + return to_c_result(simdutf::convert_utf16_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16le_to_utf8_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16le_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16be_to_utf8_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16be_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} + +size_t simdutf_convert_utf16le_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16le_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16be_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16be_to_utf8( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_valid_utf16_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_valid_utf16_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16le_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16le_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16be_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16be_to_latin1( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_valid_utf16le_to_utf8(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16le_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16be_to_utf8(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16be_to_utf8( + reinterpret_cast(input), length, output); +} + +/* UTF-16 <-> UTF-32 conversions */ +size_t simdutf_convert_utf16_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16le_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16be_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +simdutf_result simdutf_convert_utf16_to_utf32_with_errors(const char16_t *input, + size_t length, + char32_t *output) { + return to_c_result(simdutf::convert_utf16_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} +simdutf_result +simdutf_convert_utf16le_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output) { + return to_c_result(simdutf::convert_utf16le_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} +simdutf_result +simdutf_convert_utf16be_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output) { + return to_c_result(simdutf::convert_utf16be_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} + +/* Valid UTF-16 conversions */ +size_t simdutf_convert_valid_utf16_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf16le_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16le_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf16be_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16be_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} + +/* UTF-32 -> ... conversions */ +size_t simdutf_convert_utf32_to_utf8(const char32_t *input, size_t length, + char *output) { + return simdutf::convert_utf32_to_utf8( + reinterpret_cast(input), length, output); +} +simdutf_result simdutf_convert_utf32_to_utf8_with_errors(const char32_t *input, + size_t length, + char *output) { + return to_c_result(simdutf::convert_utf32_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +size_t simdutf_convert_valid_utf32_to_utf8(const char32_t *input, size_t length, + char *output) { + return simdutf::convert_valid_utf32_to_utf8( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_utf32_to_utf16(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16le( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16be( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +simdutf_result +simdutf_convert_utf32_to_latin1_with_errors(const char32_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf32_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} + +/* --- find helpers --- */ +const char *simdutf_find(const char *start, const char *end, char character) { + return simdutf::find(start, end, character); +} +const char16_t *simdutf_find_utf16(const char16_t *start, const char16_t *end, + char16_t character) { + return simdutf::find(start, end, character); +} + +/* --- base64 helpers --- */ +size_t simdutf_maximal_binary_length_from_base64(const char *input, + size_t length) { + return simdutf::maximal_binary_length_from_base64(input, length); +} +size_t simdutf_maximal_binary_length_from_base64_utf16(const char16_t *input, + size_t length) { + return simdutf::maximal_binary_length_from_base64(input, length); +} + +simdutf_result simdutf_base64_to_binary( + const char *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options) { + return to_c_result(simdutf::base64_to_binary( + input, length, output, static_cast(options), + static_cast(last_chunk_options))); +} +simdutf_result simdutf_base64_to_binary_utf16( + const char16_t *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options) { + return to_c_result(simdutf::base64_to_binary( + input, length, output, static_cast(options), + static_cast(last_chunk_options))); +} + +size_t simdutf_base64_length_from_binary(size_t length, + simdutf_base64_options options) { + return simdutf::base64_length_from_binary( + length, static_cast(options)); +} +size_t simdutf_base64_length_from_binary_with_lines( + size_t length, simdutf_base64_options options, size_t line_length) { + return simdutf::base64_length_from_binary_with_lines( + length, static_cast(options), line_length); +} + +size_t simdutf_binary_to_base64(const char *input, size_t length, char *output, + simdutf_base64_options options) { + return simdutf::binary_to_base64( + input, length, output, static_cast(options)); +} +size_t simdutf_binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + simdutf_base64_options options) { + return simdutf::binary_to_base64_with_lines( + input, length, output, line_length, + static_cast(options)); +} + +simdutf_result simdutf_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char) { + size_t local_out = outlen ? *outlen : 0; + simdutf::result r = simdutf::base64_to_binary_safe( + input, length, output, local_out, + static_cast(options), + static_cast(last_chunk_options), + decode_up_to_bad_char); + if (outlen) + *outlen = local_out; + return to_c_result(r); +} +simdutf_result simdutf_base64_to_binary_safe_utf16( + const char16_t *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char) { + size_t local_out = outlen ? *outlen : 0; + simdutf::result r = simdutf::base64_to_binary_safe( + input, length, output, local_out, + static_cast(options), + static_cast(last_chunk_options), + decode_up_to_bad_char); + if (outlen) + *outlen = local_out; + return to_c_result(r); +} + +} // extern "C" +// clang-format off +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_ASCII && SIMDUTF_FEATURE_BASE64 && SIMDUTF_FEATURE_DETECT_ENCODING +// clang-format on +/* end file src/simdutf_c.cpp */ SIMDUTF_POP_DISABLE_WARNINGS /* end file src/simdutf.cpp */ diff --git a/simdutf.h b/simdutf.h index e5f4872..0c93439 100644 --- a/simdutf.h +++ b/simdutf.h @@ -1,6 +1,6 @@ //go:build !libsimdutf -/* auto-generated on 2025-12-20 11:48:09 -0500. Do not edit! */ +/* auto-generated on 2026-01-13 09:03:21 +0100. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -22,6 +22,11 @@ #endif #endif +// C++ 26 +#if !defined(SIMDUTF_CPLUSPLUS26) && (SIMDUTF_CPLUSPLUS >= 202602L) + #define SIMDUTF_CPLUSPLUS26 1 +#endif + // C++ 23 #if !defined(SIMDUTF_CPLUSPLUS23) && (SIMDUTF_CPLUSPLUS >= 202302L) #define SIMDUTF_CPLUSPLUS23 1 @@ -202,7 +207,7 @@ #elif defined(__loongarch_lp64) #if defined(__loongarch_sx) && defined(__loongarch_asx) #define SIMDUTF_IS_LSX 1 - #define SIMDUTF_IS_LASX 1 + #define SIMDUTF_IS_LASX 1 // We can always run both #elif defined(__loongarch_sx) #define SIMDUTF_IS_LSX 1 #endif @@ -252,7 +257,7 @@ // // We are going to use runtime dispatch. -#ifdef SIMDUTF_IS_X86_64 +#if defined(SIMDUTF_IS_X86_64) || defined(SIMDUTF_IS_LSX) #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang @@ -269,7 +274,7 @@ #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc -#endif // x86 +#endif // defined(SIMDUTF_IS_X86_64) || defined(SIMDUTF_IS_LSX) // Default target region macros don't do anything. #ifndef SIMDUTF_TARGET_REGION @@ -535,6 +540,14 @@ #define simdutf_constexpr #endif +// Will evaluate to constexpr in C++23 or later. This makes it possible to mark +// functions constexpr if the "if consteval" feature is available to use. +#if SIMDUTF_CPLUSPLUS23 + #define simdutf_constexpr23 constexpr +#else + #define simdutf_constexpr23 +#endif + #ifndef SIMDUTF_DLLIMPORTEXPORT #if defined(SIMDUTF_VISUAL_STUDIO) // Visual Studio /** @@ -587,6 +600,12 @@ #define SIMDUTF_ENCODING_TYPES_H #include +#if !defined(SIMDUTF_NO_STD_TEXT_ENCODING) && \ + defined(__cpp_lib_text_encoding) && __cpp_lib_text_encoding >= 202306L + #define SIMDUTF_HAS_STD_TEXT_ENCODING 1 + #include +#endif + namespace simdutf { enum encoding_type { @@ -600,20 +619,27 @@ enum encoding_type { unspecified = 0 }; -enum endianness { LITTLE = 0, BIG = 1 }; - -constexpr bool match_system(endianness e) { #ifndef SIMDUTF_IS_BIG_ENDIAN #error "SIMDUTF_IS_BIG_ENDIAN needs to be defined." #endif + +enum endianness { + LITTLE = 0, + BIG = 1, + NATIVE = #if SIMDUTF_IS_BIG_ENDIAN - return e == endianness::BIG; + BIG #else - return e == endianness::LITTLE; + LITTLE #endif +}; + +simdutf_warn_unused simdutf_really_inline constexpr bool +match_system(endianness e) { + return e == endianness::NATIVE; } -std::string to_string(encoding_type bom); +simdutf_warn_unused std::string to_string(encoding_type bom); // Note that BOM for UTF8 is discouraged. namespace BOM { @@ -625,17 +651,136 @@ namespace BOM { * @return the corresponding encoding */ -encoding_type check_bom(const uint8_t *byte, size_t length); -encoding_type check_bom(const char *byte, size_t length); +simdutf_warn_unused encoding_type check_bom(const uint8_t *byte, size_t length); +simdutf_warn_unused encoding_type check_bom(const char *byte, size_t length); /** * Returns the size, in bytes, of the BOM for a given encoding type. * Note that UTF8 BOM are discouraged. * @param bom the encoding type * @return the size in bytes of the corresponding BOM */ -size_t bom_byte_size(encoding_type bom); +simdutf_warn_unused size_t bom_byte_size(encoding_type bom); } // namespace BOM + +#ifdef SIMDUTF_HAS_STD_TEXT_ENCODING +/** + * Convert a simdutf encoding type to a std::text_encoding. + * + * @param enc the simdutf encoding type + * @return the corresponding std::text_encoding, or + * std::text_encoding::id::unknown for unspecified/unsupported + */ +simdutf_warn_unused constexpr std::text_encoding +to_std_encoding(encoding_type enc) noexcept { + switch (enc) { + case UTF8: + return std::text_encoding(std::text_encoding::id::UTF8); + case UTF16_LE: + return std::text_encoding(std::text_encoding::id::UTF16LE); + case UTF16_BE: + return std::text_encoding(std::text_encoding::id::UTF16BE); + case UTF32_LE: + return std::text_encoding(std::text_encoding::id::UTF32LE); + case UTF32_BE: + return std::text_encoding(std::text_encoding::id::UTF32BE); + case Latin1: + return std::text_encoding(std::text_encoding::id::ISOLatin1); + case unspecified: + default: + return std::text_encoding(std::text_encoding::id::unknown); + } +} + +/** + * Convert a std::text_encoding to a simdutf encoding type. + * + * @param enc the std::text_encoding + * @return the corresponding simdutf encoding type, or + * encoding_type::unspecified if the encoding is not supported + */ +simdutf_warn_unused constexpr encoding_type +from_std_encoding(const std::text_encoding &enc) noexcept { + switch (enc.mib()) { + case std::text_encoding::id::UTF8: + return UTF8; + case std::text_encoding::id::UTF16LE: + return UTF16_LE; + case std::text_encoding::id::UTF16BE: + return UTF16_BE; + case std::text_encoding::id::UTF32LE: + return UTF32_LE; + case std::text_encoding::id::UTF32BE: + return UTF32_BE; + case std::text_encoding::id::ISOLatin1: + return Latin1; + default: + return unspecified; + } +} + +/** + * Get the native-endian UTF-16 encoding type for this system. + * + * @return UTF16_LE on little-endian systems, UTF16_BE on big-endian systems + */ +simdutf_warn_unused constexpr encoding_type native_utf16_encoding() noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return UTF16_BE; + #else + return UTF16_LE; + #endif +} + +/** + * Get the native-endian UTF-32 encoding type for this system. + * + * @return UTF32_LE on little-endian systems, UTF32_BE on big-endian systems + */ +simdutf_warn_unused constexpr encoding_type native_utf32_encoding() noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return UTF32_BE; + #else + return UTF32_LE; + #endif +} + +/** + * Convert a std::text_encoding to a simdutf encoding type, + * using native endianness for UTF-16/UTF-32 without explicit endianness. + * + * When the input is std::text_encoding::id::UTF16 or UTF32 (without LE/BE + * suffix), this returns the native-endian simdutf variant. + * + * @param enc the std::text_encoding + * @return the corresponding simdutf encoding type, or + * encoding_type::unspecified if the encoding is not supported + */ +simdutf_warn_unused constexpr encoding_type +from_std_encoding_native(const std::text_encoding &enc) noexcept { + switch (enc.mib()) { + case std::text_encoding::id::UTF8: + return UTF8; + case std::text_encoding::id::UTF16: + return native_utf16_encoding(); + case std::text_encoding::id::UTF16LE: + return UTF16_LE; + case std::text_encoding::id::UTF16BE: + return UTF16_BE; + case std::text_encoding::id::UTF32: + return native_utf32_encoding(); + case std::text_encoding::id::UTF32LE: + return UTF32_LE; + case std::text_encoding::id::UTF32BE: + return UTF32_BE; + case std::text_encoding::id::ISOLatin1: + return Latin1; + default: + return unspecified; + } +} +#endif // SIMDUTF_HAS_STD_TEXT_ENCODING + } // namespace simdutf #endif /* end file include/simdutf/encoding_types.h */ @@ -720,17 +865,18 @@ struct result { // case of success, indicates the number of code units // validated/written. - simdutf_really_inline result() noexcept + simdutf_really_inline simdutf_constexpr23 result() noexcept : error{error_code::SUCCESS}, count{0} {} - simdutf_really_inline result(error_code err, size_t pos) noexcept + simdutf_really_inline simdutf_constexpr23 result(error_code err, + size_t pos) noexcept : error{err}, count{pos} {} - simdutf_really_inline bool is_ok() const noexcept { + simdutf_really_inline simdutf_constexpr23 bool is_ok() const noexcept { return error == error_code::SUCCESS; } - simdutf_really_inline bool is_err() const noexcept { + simdutf_really_inline simdutf_constexpr23 bool is_err() const noexcept { return error != error_code::SUCCESS; } }; @@ -742,18 +888,19 @@ struct full_result { bool padding_error = false; // true if the error is due to padding, only // meaningful when error is not SUCCESS - simdutf_really_inline full_result() noexcept + simdutf_really_inline simdutf_constexpr23 full_result() noexcept : error{error_code::SUCCESS}, input_count{0}, output_count{0} {} - simdutf_really_inline full_result(error_code err, size_t pos_in, - size_t pos_out) noexcept + simdutf_really_inline simdutf_constexpr23 full_result(error_code err, + size_t pos_in, + size_t pos_out) noexcept : error{err}, input_count{pos_in}, output_count{pos_out} {} - simdutf_really_inline full_result(error_code err, size_t pos_in, - size_t pos_out, bool padding_err) noexcept + simdutf_really_inline simdutf_constexpr23 full_result( + error_code err, size_t pos_in, size_t pos_out, bool padding_err) noexcept : error{err}, input_count{pos_in}, output_count{pos_out}, padding_error{padding_err} {} - simdutf_really_inline operator result() const noexcept { + simdutf_really_inline simdutf_constexpr23 operator result() const noexcept { if (error == error_code::SUCCESS) { return result{error, output_count}; } else { @@ -777,22 +924,22 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "7.7.1" +#define SIMDUTF_VERSION "8.0.0" namespace simdutf { enum { /** * The major version (MAJOR.minor.revision) of simdutf being used. */ - SIMDUTF_VERSION_MAJOR = 7, + SIMDUTF_VERSION_MAJOR = 8, /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 7, + SIMDUTF_VERSION_MINOR = 0, /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 1 + SIMDUTF_VERSION_REVISION = 0 }; } // namespace simdutf @@ -1161,13 +1308,152 @@ static inline uint32_t detect_supported_architectures() { #define SIMDUTF_FEATURE_UTF32 1 #define SIMDUTF_FEATURE_BASE64 1 +#if SIMDUTF_CPLUSPLUS23 +/* begin file include/simdutf/constexpr_ptr.h */ +#ifndef SIMDUTF_CONSTEXPR_PTR_H +#define SIMDUTF_CONSTEXPR_PTR_H + +#include + namespace simdutf { +namespace detail { +/** + * The constexpr_ptr class is a workaround for reinterpret_cast not being + * allowed during constant evaluation. + */ +template + requires(sizeof(to) == sizeof(from)) +struct constexpr_ptr { + const from *p; -constexpr size_t default_line_length = - 76; ///< default line length for base64 encoding with lines + constexpr explicit constexpr_ptr(const from *ptr) noexcept : p(ptr) {} + + constexpr to operator*() const noexcept { return static_cast(*p); } + + constexpr constexpr_ptr &operator++() noexcept { + ++p; + return *this; + } + + constexpr constexpr_ptr operator++(int) noexcept { + auto old = *this; + ++p; + return old; + } + + constexpr constexpr_ptr &operator--() noexcept { + --p; + return *this; + } + + constexpr constexpr_ptr operator--(int) noexcept { + auto old = *this; + --p; + return old; + } + + constexpr constexpr_ptr &operator+=(std::ptrdiff_t n) noexcept { + p += n; + return *this; + } + + constexpr constexpr_ptr &operator-=(std::ptrdiff_t n) noexcept { + p -= n; + return *this; + } + + constexpr constexpr_ptr operator+(std::ptrdiff_t n) const noexcept { + return constexpr_ptr{p + n}; + } + + constexpr constexpr_ptr operator-(std::ptrdiff_t n) const noexcept { + return constexpr_ptr{p - n}; + } + + constexpr std::ptrdiff_t operator-(const constexpr_ptr &o) const noexcept { + return p - o.p; + } + + constexpr to operator[](std::ptrdiff_t n) const noexcept { + return static_cast(*(p + n)); + } + + // to prevent compilation errors for memcpy, even if it is never + // called during constant evaluation + constexpr operator const void *() const noexcept { return p; } +}; + +template +constexpr constexpr_ptr constexpr_cast_ptr(from *p) noexcept { + return constexpr_ptr{p}; +} + +/** + * helper type for constexpr_writeptr, so it is possible to + * do "*ptr = val;" + */ +template +struct constexpr_write_ptr_proxy { + + constexpr explicit constexpr_write_ptr_proxy(TargetType *raw) : p(raw) {} + + constexpr constexpr_write_ptr_proxy &operator=(SrcType v) { + *p = static_cast(v); + return *this; + } + + TargetType *p; +}; + +/** + * helper for working around reinterpret_cast not being allowed during constexpr + * evaluation. will try to act as a SrcType* but actually write to the pointer + * given in the constructor, which is of another type TargetType + */ +template struct constexpr_write_ptr { + constexpr explicit constexpr_write_ptr(TargetType *raw) : p(raw) {} + + constexpr constexpr_write_ptr_proxy operator*() const { + return constexpr_write_ptr_proxy{p}; + } + + constexpr constexpr_write_ptr_proxy + operator[](std::ptrdiff_t n) const { + return constexpr_write_ptr_proxy{p + n}; + } + + constexpr constexpr_write_ptr &operator++() { + ++p; + return *this; + } + + constexpr constexpr_write_ptr operator++(int) { + constexpr_write_ptr old = *this; + ++p; + return old; + } + + constexpr std::ptrdiff_t operator-(const constexpr_write_ptr &other) const { + return p - other.p; + } + + TargetType *p; +}; + +template +constexpr auto constexpr_cast_writeptr(TargetType *raw) { + return constexpr_write_ptr{raw}; +} + +} // namespace detail +} // namespace simdutf +#endif +/* end file include/simdutf/constexpr_ptr.h */ +#endif #if SIMDUTF_SPAN /// helpers placed in namespace detail are not a part of the public API +namespace simdutf { namespace detail { /** * matches a byte, in the many ways C++ allows. note that these @@ -1211,138 +1497,3618 @@ concept output_span_of_byte_like = requires(T &t) { { *t.data() } noexcept -> is_byte_like; { *t.data() } noexcept -> is_mutable; }; -} // namespace detail -#endif -#if SIMDUTF_FEATURE_DETECT_ENCODING /** - * Autodetect the encoding of the input, a single encoding is recommended. - * E.g., the function might return simdutf::encoding_type::UTF8, - * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or - * simdutf::encoding_type::UTF32_LE. - * - * @param input the string to analyze. - * @param length the length of the string in bytes. - * @return the detected encoding type + * a pointer like object, when indexed, results in a byte like result. + * valid examples: char*, const char*, std::array + * invalid examples: int*, std::array */ -simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const char *input, size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const uint8_t *input, size_t length) noexcept { - return autodetect_encoding(reinterpret_cast(input), length); -} - #if SIMDUTF_SPAN +template +concept indexes_into_byte_like = requires(InputPtr p) { + { std::decay_t{} } -> simdutf::detail::byte_like; +}; +template +concept indexes_into_utf16 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; +template +concept indexes_into_utf32 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; + +template +concept index_assignable_from_char = requires(InputPtr p, char s) { + { p[0] = s }; +}; + /** - * Autodetect the encoding of the input, a single encoding is recommended. - * E.g., the function might return simdutf::encoding_type::UTF8, - * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or - * simdutf::encoding_type::UTF32_LE. - * - * @param input the string to analyze. can be a anything span-like that has a - * data() and size() that points to character data: std::string, - * std::string_view, std::vector, std::span etc. - * @return the detected encoding type + * a pointer like object that results in a uint32_t when indexed. + * valid examples: uint32_t* */ -simdutf_really_inline simdutf_warn_unused simdutf::encoding_type -autodetect_encoding( - const detail::input_span_of_byte_like auto &input) noexcept { - return autodetect_encoding(reinterpret_cast(input.data()), - input.size()); +template +concept indexes_into_uint32 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; +} // namespace detail +} // namespace simdutf +#endif // SIMDUTF_SPAN + +// these includes are needed for constexpr support. they are +// not part of the public api. +/* begin file include/simdutf/scalar/swap_bytes.h */ +#ifndef SIMDUTF_SWAP_BYTES_H +#define SIMDUTF_SWAP_BYTES_H + +namespace simdutf { +namespace scalar { + +constexpr inline simdutf_warn_unused uint16_t +u16_swap_bytes(const uint16_t word) { + return uint16_t((word >> 8) | (word << 8)); } - #endif // SIMDUTF_SPAN -/** - * Autodetect the possible encodings of the input in one pass. - * E.g., if the input might be UTF-16LE or UTF-8, this function returns - * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE). - * - * Overridden by each implementation. - * - * @param input the string to analyze. - * @param length the length of the string in bytes. - * @return the detected encoding type - */ -simdutf_warn_unused int detect_encodings(const char *input, - size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused int -detect_encodings(const uint8_t *input, size_t length) noexcept { - return detect_encodings(reinterpret_cast(input), length); +constexpr inline simdutf_warn_unused uint32_t +u32_swap_bytes(const uint32_t word) { + return ((word >> 24) & 0xff) | // move byte 3 to byte 0 + ((word << 8) & 0xff0000) | // move byte 1 to byte 2 + ((word >> 8) & 0xff00) | // move byte 2 to byte 1 + ((word << 24) & 0xff000000); // byte 0 to byte 3 } - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused int -detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { - return detect_encodings(reinterpret_cast(input.data()), - input.size()); + +namespace utf32 { +template constexpr uint32_t swap_if_needed(uint32_t c) { + return !match_system(big_endian) ? scalar::u32_swap_bytes(c) : c; } - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_DETECT_ENCODING +} // namespace utf32 -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/** - * Validate the UTF-8 string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf8_with_errors. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid UTF-8. - */ -simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { - return validate_utf8(reinterpret_cast(input.data()), - input.size()); +namespace utf16 { +template constexpr uint16_t swap_if_needed(uint16_t c) { + return !match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; } - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +} // namespace utf16 -#if SIMDUTF_FEATURE_UTF8 -/** - * Validate the UTF-8 string and stop on error. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf8_with_errors(const char *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors( - const detail::input_span_of_byte_like auto &input) noexcept { - return validate_utf8_with_errors(reinterpret_cast(input.data()), - input.size()); +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/swap_bytes.h */ +/* begin file include/simdutf/scalar/ascii.h */ +#ifndef SIMDUTF_ASCII_H +#define SIMDUTF_ASCII_H + +namespace simdutf { +namespace scalar { +namespace { +namespace ascii { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 bool validate(InputPtr data, + size_t len) noexcept { + uint64_t pos = 0; + +#if SIMDUTF_CPLUSPLUS23 + // avoid memcpy during constant evaluation + if !consteval +#endif + // process in blocks of 16 bytes when possible + { + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + return false; + } + } + } + + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return false; + } + } + return true; } - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(InputPtr data, size_t len) noexcept { + size_t pos = 0; +#if SIMDUTF_CPLUSPLUS23 + // avoid memcpy during constant evaluation + if !consteval +#endif + { + // process in blocks of 16 bytes when possible + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + } + } + } -#if SIMDUTF_FEATURE_ASCII -/** - * Validate the ASCII string. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid ASCII. - */ -simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { - return validate_ascii(reinterpret_cast(input.data()), - input.size()); + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, pos); } - #endif // SIMDUTF_SPAN -/** +} // namespace ascii +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/ascii.h */ +/* begin file include/simdutf/scalar/atomic_util.h */ +#ifndef SIMDUTF_ATOMIC_UTIL_H +#define SIMDUTF_ATOMIC_UTIL_H +#if SIMDUTF_ATOMIC_REF + #include +namespace simdutf { +namespace scalar { + +// This function is a memcpy that uses atomic operations to read from the +// source. +inline void memcpy_atomic_read(char *dst, const char *src, size_t len) { + static_assert(std::atomic_ref::required_alignment == sizeof(char), + "std::atomic_ref requires the same alignment as char_type"); + // We expect all 64-bit systems to be able to read 64-bit words from an + // aligned memory region atomically. You might be able to do better on + // specific systems, e.g., x64 systems can read 128-bit words atomically. + constexpr size_t alignment = sizeof(uint64_t); + + // Lambda for atomic byte-by-byte copy + auto bbb_memcpy_atomic_read = [](char *bytedst, const char *bytesrc, + size_t bytelen) noexcept { + char *mutable_src = const_cast(bytesrc); + for (size_t j = 0; j < bytelen; ++j) { + bytedst[j] = + std::atomic_ref(mutable_src[j]).load(std::memory_order_relaxed); + } + }; + + // Handle unaligned start + size_t offset = reinterpret_cast(src) % alignment; + if (offset) { + size_t to_align = std::min(len, alignment - offset); + bbb_memcpy_atomic_read(dst, src, to_align); + src += to_align; + dst += to_align; + len -= to_align; + } + + // Process aligned 64-bit chunks + while (len >= alignment) { + auto *src_aligned = reinterpret_cast(const_cast(src)); + const auto dst_value = + std::atomic_ref(*src_aligned).load(std::memory_order_relaxed); + std::memcpy(dst, &dst_value, sizeof(uint64_t)); + src += alignment; + dst += alignment; + len -= alignment; + } + + // Handle remaining bytes + if (len) { + bbb_memcpy_atomic_read(dst, src, len); + } +} + +// This function is a memcpy that uses atomic operations to write to the +// destination. +inline void memcpy_atomic_write(char *dst, const char *src, size_t len) { + static_assert(std::atomic_ref::required_alignment == sizeof(char), + "std::atomic_ref requires the same alignment as char"); + // We expect all 64-bit systems to be able to write 64-bit words to an aligned + // memory region atomically. + // You might be able to do better on specific systems, e.g., x64 systems can + // write 128-bit words atomically. + constexpr size_t alignment = sizeof(uint64_t); + + // Lambda for atomic byte-by-byte write + auto bbb_memcpy_atomic_write = [](char *bytedst, const char *bytesrc, + size_t bytelen) noexcept { + for (size_t j = 0; j < bytelen; ++j) { + std::atomic_ref(bytedst[j]) + .store(bytesrc[j], std::memory_order_relaxed); + } + }; + + // Handle unaligned start + size_t offset = reinterpret_cast(dst) % alignment; + if (offset) { + size_t to_align = std::min(len, alignment - offset); + bbb_memcpy_atomic_write(dst, src, to_align); + dst += to_align; + src += to_align; + len -= to_align; + } + + // Process aligned 64-bit chunks + while (len >= alignment) { + auto *dst_aligned = reinterpret_cast(dst); + uint64_t src_val; + std::memcpy(&src_val, src, sizeof(uint64_t)); // Non-atomic read from src + std::atomic_ref(*dst_aligned) + .store(src_val, std::memory_order_relaxed); + dst += alignment; + src += alignment; + len -= alignment; + } + + // Handle remaining bytes + if (len) { + bbb_memcpy_atomic_write(dst, src, len); + } +} +} // namespace scalar +} // namespace simdutf +#endif // SIMDUTF_ATOMIC_REF +#endif // SIMDUTF_ATOMIC_UTIL_H +/* end file include/simdutf/scalar/atomic_util.h */ +/* begin file include/simdutf/scalar/latin1.h */ +#ifndef SIMDUTF_LATIN1_H +#define SIMDUTF_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1 { + +simdutf_really_inline size_t utf8_length_from_latin1(const char *buf, + size_t len) { + const uint8_t *c = reinterpret_cast(buf); + size_t answer = 0; + for (size_t i = 0; i < len; i++) { + if ((c[i] >> 7)) { + answer++; + } + } + return answer + len; +} + +} // namespace latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1.h */ +/* begin file include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF16_H +#define SIMDUTF_LATIN1_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint8_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); + pos++; + } + + return utf16_output - start; +} + +template +inline result convert_with_errors(const char *buf, size_t len, + char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); + pos++; + } + + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace latin1_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h */ +/* begin file include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF32_H +#define SIMDUTF_LATIN1_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char32_t *utf32_output) { + char32_t *start{utf32_output}; + for (size_t i = 0; i < len; i++) { + *utf32_output++ = uint8_t(data[i]); + } + return utf32_output - start; +} + +} // namespace latin1_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h */ +/* begin file include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF8_H +#define SIMDUTF_LATIN1_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + // const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t utf8_pos = 0; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + utf8_output[utf8_pos++] = char(data[pos]); + pos++; + } + continue; + } + } // if (pos + 16 <= len) + } // !consteval scope + + unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } + } // while + return utf8_pos; +} + +simdutf_really_inline size_t convert(const char *buf, size_t len, + char *utf8_output) { + return convert(reinterpret_cast(buf), len, + utf8_output); +} + +inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, + size_t utf8_len) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t skip_pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + // try to convert the next block of 16 ASCII bytes + if (pos >= skip_pos && pos + 16 <= len && + utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, + // check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + ::memcpy(utf8_output + utf8_pos, buf + pos, 16); + utf8_pos += 16; + pos += 16; + } else { + // At least one of the next 16 bytes are not ASCII, we will process them + // one by one + skip_pos = pos + 16; + } + } else { + const auto byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + } + return utf8_pos; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_safe_constexpr(InputPtr data, size_t len, + OutputPtr utf8_output, + size_t utf8_len) { + size_t pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + const unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + return utf8_pos; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 simdutf_warn_unused size_t +utf8_length_from_latin1(InputPtr input, size_t length) noexcept { + size_t answer = length; + size_t i = 0; + +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + auto pop = [](uint64_t v) { + return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * + UINT64_C(0x0101010101010101) >> + 56); + }; + for (; i + 32 <= length; i += 32) { + uint64_t v; + memcpy(&v, input + i, 8); + answer += pop(v); + memcpy(&v, input + i + 8, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 16, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 24, sizeof(v)); + answer += pop(v); + } + for (; i + 8 <= length; i += 8) { + uint64_t v; + memcpy(&v, input + i, sizeof(v)); + answer += pop(v); + } + } // !consteval scope + for (; i + 1 <= length; i += 1) { + answer += static_cast(input[i]) >> 7; + } + return answer; +} + +} // namespace latin1_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h */ +/* begin file include/simdutf/scalar/utf16.h */ +#ifndef SIMDUTF_UTF16_H +#define SIMDUTF_UTF16_H + +namespace simdutf { +namespace scalar { +namespace utf16 { + +template +simdutf_warn_unused simdutf_constexpr23 bool +validate_as_ascii(const char16_t *data, size_t len) noexcept { + for (size_t pos = 0; pos < len; pos++) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if (word >= 0x80) { + return false; + } + } + return true; +} + +template +inline simdutf_warn_unused simdutf_constexpr23 bool +validate(const char16_t *data, size_t len) noexcept { + uint64_t pos = 0; + while (pos < len) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return false; + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return false; + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = char16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return false; + } + pos += 2; + } else { + pos++; + } + } + return true; +} + +template +inline simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(const char16_t *data, size_t len) noexcept { + size_t pos = 0; + while (pos < len) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + pos += 2; + } else { + pos++; + } + } + return result(error_code::SUCCESS, pos); +} + +template +simdutf_constexpr23 size_t count_code_points(const char16_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +template +simdutf_constexpr23 size_t utf8_length_from_utf16(const char16_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter++; // ASCII + counter += static_cast( + word > + 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes + counter += static_cast((word > 0x7FF && word <= 0xD7FF) || + (word >= 0xE000)); // three-byte + } + return counter; +} + +template +simdutf_constexpr23 size_t utf32_length_from_utf16(const char16_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +simdutf_really_inline simdutf_constexpr23 void +change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { + for (size_t i = 0; i < size; i++) { + *output++ = char16_t(input[i] >> 8 | input[i] << 8); + } +} + +template +simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16(const char16_t *input, size_t length) { + if (length == 0) { + return 0; + } + uint16_t last_word = uint16_t(input[length - 1]); + last_word = scalar::utf16::swap_if_needed(last_word); + length -= ((last_word & 0xFC00) == 0xD800); + return length; +} + +template +simdutf_constexpr bool is_high_surrogate(char16_t c) { + c = scalar::utf16::swap_if_needed(c); + return (0xd800 <= c && c <= 0xdbff); +} + +template +simdutf_constexpr bool is_low_surrogate(char16_t c) { + c = scalar::utf16::swap_if_needed(c); + return (0xdc00 <= c && c <= 0xdfff); +} + +simdutf_really_inline constexpr bool high_surrogate(char16_t c) { + return (0xd800 <= c && c <= 0xdbff); +} + +simdutf_really_inline constexpr bool low_surrogate(char16_t c) { + return (0xdc00 <= c && c <= 0xdfff); +} + +template +simdutf_constexpr23 result +utf8_length_from_utf16_with_replacement(const char16_t *p, size_t len) { + bool any_surrogates = false; + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (is_high_surrogate(p[i])) { + any_surrogates = true; + // surrogate pair + if (i + 1 < len && is_low_surrogate(p[i + 1])) { + counter += 4; + i++; // skip low surrogate + } else { + counter += 3; // unpaired high surrogate replaced by U+FFFD + } + continue; + } else if (is_low_surrogate(p[i])) { + any_surrogates = true; + counter += 3; // unpaired low surrogate replaced by U+FFFD + continue; + } + char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; + counter++; // at least 1 byte + counter += + static_cast(word > 0x7F); // non-ASCII is at least 2 bytes + counter += static_cast(word > 0x7FF); // three-byte + } + return {any_surrogates ? error_code::SURROGATE : error_code::SUCCESS, + counter}; +} + +// variable templates are a C++14 extension +template constexpr char16_t replacement() { + return !match_system(big_endian) ? scalar::u16_swap_bytes(0xfffd) : 0xfffd; +} + +template +simdutf_constexpr23 void to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) { + const char16_t replacement = utf16::replacement(); + bool high_surrogate_prev = false, high_surrogate, low_surrogate; + size_t i = 0; + for (; i < len; i++) { + char16_t c = input[i]; + high_surrogate = is_high_surrogate(c); + low_surrogate = is_low_surrogate(c); + if (high_surrogate_prev && !low_surrogate) { + output[i - 1] = replacement; + } + + if (!high_surrogate_prev && low_surrogate) { + output[i] = replacement; + } else { + output[i] = input[i]; + } + high_surrogate_prev = high_surrogate; + } + + /* string may not end with high surrogate */ + if (high_surrogate_prev) { + output[i - 1] = replacement; + } +} + +} // namespace utf16 +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16.h */ +/* begin file include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h */ +#ifndef SIMDUTF_UTF16_TO_LATIN1_H +#define SIMDUTF_UTF16_TO_LATIN1_H + +#include // for std::memcpy + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr latin_output) { + if (len == 0) { + return 0; + } + size_t pos = 0; + const auto latin_output_start = latin_output; + uint16_t word = 0; + uint16_t too_large = 0; + + while (pos < len) { + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + too_large |= word; + *latin_output++ = char(word & 0xFF); + pos++; + } + if ((too_large & 0xFF00) != 0) { + return 0; + } + + return latin_output - latin_output_start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + OutputPtr latin_output) { + if (len == 0) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + auto start = latin_output; + uint16_t word; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that + // they are Latin1 + uint64_t v1, v2, v3, v4; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); + ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); + ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); + + if simdutf_constexpr (!match_system(big_endian)) { + v1 = (v1 >> 8) | (v1 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v2 = (v2 >> 8) | (v2 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v3 = (v3 >> 8) | (v3 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v4 = (v4 >> 8) | (v4 << (64 - 8)); + } + + if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF00) == 0) { + *latin_output++ = char(word & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h */ +/* begin file include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H +#define SIMDUTF_VALID_UTF16_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +template +simdutf_constexpr23 inline size_t +convert_valid_impl(InputIterator data, size_t len, + OutputIterator latin_output) { + static_assert( + std::is_same::type, uint16_t>::value, + "must decay to uint16_t"); + size_t pos = 0; + const auto start = latin_output; + uint16_t word = 0; + + while (pos < len) { + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + *latin_output++ = char(word); + pos++; + } + + return latin_output - start; +} + +template +simdutf_really_inline size_t convert_valid(const char16_t *buf, size_t len, + char *latin_output) { + return convert_valid_impl(reinterpret_cast(buf), + len, latin_output); +} +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +/* begin file include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h */ +#ifndef SIMDUTF_UTF16_TO_UTF32_H +#define SIMDUTF_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +simdutf_constexpr23 size_t convert(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +template +simdutf_constexpr23 result convert_with_errors(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h */ +/* begin file include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H +#define SIMDUTF_VALID_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +simdutf_constexpr23 size_t convert_valid(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +/* begin file include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h */ +#ifndef SIMDUTF_UTF16_TO_UTF8_H +#define SIMDUTF_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_utf16 +// FIXME constrain output as well +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + const auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 bytes + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + if (pos + 1 >= len) { + return 0; + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 full_result convert_with_errors(InputPtr data, size_t len, + OutputPtr utf8_output, + size_t utf8_len = 0) { + if (check_output && utf8_len == 0) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, 0, 0); + } + + size_t pos = 0; + auto start = utf8_output; + auto end = utf8_output + utf8_len; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 bytes + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) + v = (v >> 8) | (v << (64 - 8)); + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + if (check_output && size_t(end - utf8_output) < 1) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + if (check_output && size_t(end - utf8_output) < 1) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + if (check_output && size_t(end - utf8_output) < 2) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (check_output && size_t(end - utf8_output) < 3) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + + if (check_output && size_t(end - utf8_output) < 4) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + // must be a surrogate pair + if (pos + 1 >= len) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return full_result(error_code::SUCCESS, pos, utf8_output - start); +} + +template +inline result simple_convert_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + return convert_with_errors(buf, len, utf8_output, 0); +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h */ +/* begin file include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H +#define SIMDUTF_VALID_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 4 ASCII characters + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +/* begin file include/simdutf/scalar/utf32.h */ +#ifndef SIMDUTF_UTF32_H +#define SIMDUTF_UTF32_H + +namespace simdutf { +namespace scalar { +namespace utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_uint32 +#endif +simdutf_warn_unused simdutf_constexpr23 bool validate(InputPtr data, + size_t len) noexcept { + uint64_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return false; + } + } + return true; +} + +simdutf_warn_unused simdutf_really_inline bool validate(const char32_t *buf, + size_t len) noexcept { + return validate(reinterpret_cast(buf), len); +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_uint32 +#endif +simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(InputPtr data, size_t len) noexcept { + size_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +simdutf_warn_unused simdutf_really_inline result +validate_with_errors(const char32_t *buf, size_t len) noexcept { + return validate_with_errors(reinterpret_cast(buf), len); +} + +inline simdutf_constexpr23 size_t utf8_length_from_utf32(const char32_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // credit: @ttsugriy for the vectorizable approach + counter++; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes + } + return counter; +} + +inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_utf32(const char32_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + counter++; // non-surrogate word + counter += static_cast(p[i] > 0xFFFF); // surrogate pair + } + return counter; +} + +} // namespace utf32 +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32.h */ +/* begin file include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h */ +#ifndef SIMDUTF_UTF32_TO_LATIN1_H +#define SIMDUTF_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +inline simdutf_constexpr23 size_t convert(const char32_t *data, size_t len, + char *latin1_output) { + char *start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + uint32_t too_large = 0; + + while (pos < len) { + utf32_char = (uint32_t)data[pos]; + too_large |= utf32_char; + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } + if ((too_large & 0xFFFFFF00) != 0) { + return 0; + } + return latin1_output - start; +} + +inline simdutf_constexpr23 result convert_with_errors(const char32_t *data, + size_t len, + char *latin1_output) { + char *start{latin1_output}; + size_t pos = 0; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are Latin1 + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(data[pos]); + *latin1_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t utf32_char = data[pos]; + if ((utf32_char & 0xFFFFFF00) == + 0) { // Check if the character can be represented in Latin-1 + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + }; + } + return result(error_code::SUCCESS, latin1_output - start); +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h */ +/* begin file include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H +#define SIMDUTF_VALID_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +template +simdutf_constexpr23 size_t convert_valid(ReadPtr data, size_t len, + WritePtr latin1_output) { + static_assert( + std::is_same::type, uint32_t>::value, + "dereferencing the data pointer must result in a uint32_t"); + auto start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + + while (pos < len) { + utf32_char = data[pos]; + +#if SIMDUTF_CPLUSPLUS23 + // avoid using the 8 byte at a time optimization in constant evaluation + // mode. memcpy can't be used and replacing it with bitwise or gave worse + // codegen (when not during constant evaluation). + if !consteval { +#endif + if (pos + 2 <= len) { + // if it is safe to read 8 more bytes, check that they are Latin1 + uint64_t v; + std::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(data[pos]); + *latin1_output++ = char(data[pos + 1]); + pos += 2; + continue; + } else { + // output can not be represented in latin1 + return 0; + } + } +#if SIMDUTF_CPLUSPLUS23 + } // if ! consteval +#endif + if ((utf32_char & 0xFFFFFF00) == 0) { + *latin1_output++ = char(utf32_char); + } else { + // output can not be represented in latin1 + return 0; + } + pos++; + } + return latin1_output - start; +} + +simdutf_really_inline size_t convert_valid(const char32_t *buf, size_t len, + char *latin1_output) { + return convert_valid(reinterpret_cast(buf), len, + latin1_output); +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +/* begin file include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h */ +#ifndef SIMDUTF_UTF32_TO_UTF16_H +#define SIMDUTF_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +simdutf_constexpr23 size_t convert(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return 0; + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return utf16_output - start; +} + +template +simdutf_constexpr23 result convert_with_errors(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h */ +/* begin file include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H +#define SIMDUTF_VALID_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +simdutf_constexpr23 size_t convert_valid(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + pos++; + } else { + // will generate a surrogate pair + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos++; + } + } + return utf16_output - start; +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +/* begin file include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h */ +#ifndef SIMDUTF_UTF32_TO_UTF8_H +#define SIMDUTF_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return 0; + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return result(error_code::SUCCESS, utf8_output - start); +} + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h */ +/* begin file include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H +#define SIMDUTF_VALID_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +/* begin file include/simdutf/scalar/utf8.h */ +#ifndef SIMDUTF_UTF8_H +#define SIMDUTF_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8 { + +// credit: based on code from Google Fuchsia (Apache Licensed) +template +simdutf_constexpr23 simdutf_warn_unused bool validate(BytePtr data, + size_t len) noexcept { + static_assert( + std::is_same::type, uint8_t>::value, + "dereferencing the data pointer must result in a uint8_t"); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + uint64_t next_pos; +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // check if the next 16 bytes are ascii. + next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 16 more bytes, check + // that they are ascii + uint64_t v1{}; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2{}; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + } + + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return true; + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return false; + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point) || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return false; + } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +simdutf_really_inline simdutf_warn_unused bool validate(const char *buf, + size_t len) noexcept { + return validate(reinterpret_cast(buf), len); +} + +template +simdutf_constexpr23 simdutf_warn_unused result +validate_with_errors(BytePtr data, size_t len) noexcept { + static_assert( + std::is_same::type, uint8_t>::value, + "dereferencing the data pointer must result in a uint8_t"); + size_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + size_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return result(error_code::SUCCESS, len); + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return result(error_code::OVERLONG, pos); + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + pos = next_pos; + } + return result(error_code::SUCCESS, len); +} + +simdutf_really_inline simdutf_warn_unused result +validate_with_errors(const char *buf, size_t len) noexcept { + return validate_with_errors(reinterpret_cast(buf), len); +} + +// Finds the previous leading byte starting backward from buf and validates with +// errors from there Used to pinpoint the location of an error when an invalid +// chunk is detected We assume that the stream starts with a leading byte, and +// to check that it is the case, we ask that you pass a pointer to the start of +// the stream (start). +inline simdutf_warn_unused result rewind_and_validate_with_errors( + const char *start, const char *buf, size_t len) noexcept { + // First check that we start with a leading byte + if ((*start & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, 0); + } + size_t extra_len{0}; + // A leading byte cannot be further than 4 bytes away + for (int i = 0; i < 5; i++) { + unsigned char byte = *buf; + if ((byte & 0b11000000) != 0b10000000) { + break; + } else { + buf--; + extra_len++; + } + } + + result res = validate_with_errors(buf, len + extra_len); + res.count -= extra_len; + return res; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t count_code_points(InputPtr data, size_t len) { + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // -65 is 0b10111111, anything larger in two-complement's should start a new + // code point. + if (int8_t(data[i]) > -65) { + counter++; + } + } + return counter; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t utf16_length_from_utf8(InputPtr data, size_t len) { + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (int8_t(data[i]) > -65) { + counter++; + } + if (uint8_t(data[i]) >= 240) { + counter++; + } + } + return counter; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf8(InputPtr input, size_t length) { + if (length < 3) { + switch (length) { + case 2: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 2 bytes left + return length; + case 1: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + return length; + case 0: + return length; + } + } + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 3]) >= 0xf0) { + return length - 3; + } // 4-byte characters with only 3 bytes left + return length; +} + +} // namespace utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8.h */ +/* begin file include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h */ +#ifndef SIMDUTF_UTF8_TO_LATIN1_H +#define SIMDUTF_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::indexes_into_byte_like) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr latin_output) { + size_t pos = 0; + auto start = latin_output; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000 .... etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(data[pos]); + pos++; + } + continue; + } + } + } + + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80 || 0xFF < code_point) { + return 0; // We only care about the range 129-255 which is Non-ASCII + // latin1 characters. A code_point beneath 0x80 is invalid as + // it is already covered by bytes whose leading bit is zero. + } + *latin_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } + return latin_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char *latin_output) { + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000...etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(data[pos]); + pos++; + } + continue; + } + } + } + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80) { + return result(error_code::OVERLONG, pos); + } + if (0xFF < code_point) { + return result(error_code::TOO_LARGE, pos); + } // We only care about the range 129-255 which is Non-ASCII latin1 + // characters + *latin_output++ = char(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + return result(error_code::TOO_LARGE, pos); + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + return result(error_code::TOO_LARGE, pos); + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + + return result(error_code::HEADER_BITS, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char *latin1_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, latin1_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H +#define SIMDUTF_VALID_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char *latin_output) { + + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + + // suppose it is not an all ASCII byte sequence + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + *latin_output++ = char(code_point); + pos += 2; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return latin_output - start; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +/* begin file include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h */ +#ifndef SIMDUTF_UTF8_TO_UTF16_H +#define SIMDUTF_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + // try to convert the next block of 16 ASCII bytes + { + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(data[pos])) + : char16_t(data[pos]); + pos++; + } + continue; + } + } + } + + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + return 0; + } + } + return utf16_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + const char16_t byte = uint8_t(data[pos]); + *utf16_output++ = + !match_system(big_endian) ? u16_swap_bytes(byte) : byte; + pos++; + } + continue; + } + } + } + + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf16_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +template +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char16_t *utf16_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, utf16_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H +#define SIMDUTF_VALID_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + const char16_t byte = uint8_t(data[pos]); + *utf16_output++ = + !match_system(big_endian) ? u16_swap_bytes(byte) : byte; + pos++; + } + continue; + } + } + } + + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + break; + } // minimal bound checking + uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | + (uint8_t(data[pos + 1]) & 0b00111111)); + if simdutf_constexpr (!match_system(big_endian)) { + code_point = u16_swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + break; + } // minimal bound checking + uint16_t code_point = + uint16_t(((leading_byte & 0b00001111) << 12) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 6) | + (uint8_t(data[pos + 2]) & 0b00111111)); + if simdutf_constexpr (!match_system(big_endian)) { + code_point = u16_swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_point = ((leading_byte & 0b00000111) << 18) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 12) | + ((uint8_t(data[pos + 2]) & 0b00111111) << 6) | + (uint8_t(data[pos + 3]) & 0b00111111); + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h */ +#ifndef SIMDUTF_UTF8_TO_UTF32_H +#define SIMDUTF_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + return 0; + } + } + return utf32_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes location (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char32_t *utf32_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + size_t how_far_back = 3; // 3 bytes in the past + current position + if (how_far_back > prior_bytes) { + how_far_back = prior_bytes; + } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + + result res = convert_with_errors(buf, len + extra_len, utf32_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H +#define SIMDUTF_VALID_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | + (uint8_t(data[pos + 1]) & 0b00111111)); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 6) | + (uint8_t(data[pos + 2]) & 0b00111111)); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_word = ((leading_byte & 0b00000111) << 18) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 12) | + ((uint8_t(data[pos + 2]) & 0b00111111) << 6) | + (uint8_t(data[pos + 3]) & 0b00111111); + *utf32_output++ = char32_t(code_word); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ + +namespace simdutf { + +constexpr size_t default_line_length = + 76; ///< default line length for base64 encoding with lines + +#if SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Autodetect the encoding of the input, a single encoding is recommended. + * E.g., the function might return simdutf::encoding_type::UTF8, + * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or + * simdutf::encoding_type::UTF32_LE. + * + * @param input the string to analyze. + * @param length the length of the string in bytes. + * @return the detected encoding type + */ +simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const char *input, size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const uint8_t *input, size_t length) noexcept { + return autodetect_encoding(reinterpret_cast(input), length); +} + #if SIMDUTF_SPAN +/** + * Autodetect the encoding of the input, a single encoding is recommended. + * E.g., the function might return simdutf::encoding_type::UTF8, + * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or + * simdutf::encoding_type::UTF32_LE. + * + * @param input the string to analyze. can be a anything span-like that has a + * data() and size() that points to character data: std::string, + * std::string_view, std::vector, std::span etc. + * @return the detected encoding type + */ +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type +autodetect_encoding( + const detail::input_span_of_byte_like auto &input) noexcept { + return autodetect_encoding(reinterpret_cast(input.data()), + input.size()); +} + #endif // SIMDUTF_SPAN + +/** + * Autodetect the possible encodings of the input in one pass. + * E.g., if the input might be UTF-16LE or UTF-8, this function returns + * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE). + * + * Overridden by each implementation. + * + * @param input the string to analyze. + * @param length the length of the string in bytes. + * @return the detected encoding type + */ +simdutf_warn_unused int detect_encodings(const char *input, + size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused int +detect_encodings(const uint8_t *input, size_t length) noexcept { + return detect_encodings(reinterpret_cast(input), length); +} + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused int +detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { + return detect_encodings(reinterpret_cast(input.data()), + input.size()); +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Validate the UTF-8 string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf8_with_errors. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused bool +validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf8(reinterpret_cast(input.data()), + input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +/** + * Validate the UTF-8 string and stop on error. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result +validate_utf8_with_errors( + const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf8_with_errors( + reinterpret_cast(input.data()), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +/** + * Validate the ASCII string. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::ascii::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_ascii(reinterpret_cast(input.data()), + input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** * Validate the ASCII string and stop on error. It might be faster than * validate_utf8 when an error is expected to occur early. * @@ -1358,10 +5124,19 @@ validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_ascii_with_errors( const detail::input_span_of_byte_like auto &input) noexcept { - return validate_ascii_with_errors( - reinterpret_cast(input.data()), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::ascii::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_ascii_with_errors( + reinterpret_cast(input.data()), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_ASCII @@ -1381,9 +5156,17 @@ simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf16_as_ascii(std::span input) noexcept { - return validate_utf16_as_ascii(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16_as_ascii(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1401,9 +5184,17 @@ validate_utf16_as_ascii(std::span input) noexcept { simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf16be_as_ascii(std::span input) noexcept { - return validate_utf16be_as_ascii(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16be_as_ascii(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1421,9 +5212,17 @@ validate_utf16be_as_ascii(std::span input) noexcept { simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf16le_as_ascii(std::span input) noexcept { - return validate_utf16le_as_ascii(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16le_as_ascii(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII @@ -1446,9 +5245,17 @@ validate_utf16le_as_ascii(std::span input) noexcept { simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf16(std::span input) noexcept { - return validate_utf16(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), + input.size()); + } else + #endif + { + return validate_utf16(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 @@ -1471,9 +5278,17 @@ validate_utf16(std::span input) noexcept { simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused bool validate_utf16le(std::span input) noexcept { - return validate_utf16le(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), + input.size()); + } else + #endif + { + return validate_utf16le(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING @@ -1496,9 +5311,16 @@ validate_utf16le(std::span input) noexcept { simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf16be(std::span input) noexcept { - return validate_utf16be(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), input.size()); + } else + #endif + { + return validate_utf16be(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1522,9 +5344,17 @@ validate_utf16be(std::span input) noexcept { simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result validate_utf16_with_errors(std::span input) noexcept { - return validate_utf16_with_errors(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors( + input.data(), input.size()); + } else + #endif + { + return validate_utf16_with_errors(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1547,9 +5377,17 @@ validate_utf16_with_errors(std::span input) noexcept { simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result validate_utf16le_with_errors(std::span input) noexcept { - return validate_utf16le_with_errors(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors( + input.data(), input.size()); + } else + #endif + { + return validate_utf16le_with_errors(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1572,9 +5410,17 @@ validate_utf16le_with_errors(std::span input) noexcept { simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result validate_utf16be_with_errors(std::span input) noexcept { - return validate_utf16be_with_errors(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors(input.data(), + input.size()); + } else + #endif + { + return validate_utf16be_with_errors(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -1593,10 +5439,18 @@ validate_utf16be_with_errors(std::span input) noexcept { void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline void +simdutf_really_inline simdutf_constexpr23 void to_well_formed_utf16le(std::span input, std::span output) noexcept { - to_well_formed_utf16le(input.data(), input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16le(input.data(), input.size(), output.data()); + } } #endif // SIMDUTF_SPAN @@ -1615,10 +5469,18 @@ to_well_formed_utf16le(std::span input, void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline void +simdutf_really_inline simdutf_constexpr23 void to_well_formed_utf16be(std::span input, std::span output) noexcept { - to_well_formed_utf16be(input.data(), input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16be(input.data(), input.size(), output.data()); + } } #endif // SIMDUTF_SPAN @@ -1637,10 +5499,18 @@ to_well_formed_utf16be(std::span input, void to_well_formed_utf16(const char16_t *input, size_t len, char16_t *output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline void +simdutf_really_inline simdutf_constexpr23 void to_well_formed_utf16(std::span input, std::span output) noexcept { - to_well_formed_utf16(input.data(), input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16(input.data(), input.size(), output.data()); + } } #endif // SIMDUTF_SPAN @@ -1664,9 +5534,17 @@ to_well_formed_utf16(std::span input, simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool validate_utf32(std::span input) noexcept { - return validate_utf32(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf32(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING @@ -1691,9 +5569,17 @@ validate_utf32(std::span input) noexcept { simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result validate_utf32_with_errors(std::span input) noexcept { - return validate_utf32_with_errors(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf32_with_errors(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF32 @@ -1713,12 +5599,23 @@ simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf8( const detail::input_span_of_byte_like auto &latin1_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_latin1_to_utf8( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf8_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::convert( + detail::constexpr_cast_ptr(latin1_input.data()), + latin1_input.size(), + detail::constexpr_cast_writeptr(utf8_output.data())); + } else + #endif + { + return convert_latin1_to_utf8( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -1739,18 +5636,27 @@ simdutf_warn_unused size_t convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, size_t utf8_len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf8_safe( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - // implementation note: outputspan is a forwarding ref to avoid copying and - // allow both lvalues and rvalues. std::span can be copied without problems, - // but std::vector should not, and this function should accept both. it will - // allow using an owning rvalue ref (example: passing a temporary std::string) - // as output, but the user will quickly find out that he has no way of getting - // the data out of the object in that case. - return convert_latin1_to_utf8_safe( - input.data(), input.size(), reinterpret_cast(utf8_output.data()), - utf8_output.size()); + // implementation note: outputspan is a forwarding ref to avoid copying + // and allow both lvalues and rvalues. std::span can be copied without + // problems, but std::vector should not, and this function should accept + // both. it will allow using an owning rvalue ref (example: passing a + // temporary std::string) as output, but the user will quickly find out + // that he has no way of getting the data out of the object in that case. + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::convert_safe_constexpr( + input.data(), input.size(), utf8_output.data(), utf8_output.size()); + } else + #endif + { + return convert_latin1_to_utf8_safe( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(utf8_output.data()), utf8_output.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -1769,12 +5675,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( simdutf_warn_unused size_t convert_latin1_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf16le( const detail::input_span_of_byte_like auto &latin1_input, std::span utf16_output) noexcept { - return convert_latin1_to_utf16le( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + latin1_input.data(), latin1_input.size(), utf16_output.data()); + } else + #endif + { + return convert_latin1_to_utf16le( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -1791,11 +5706,20 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( simdutf_warn_unused size_t convert_latin1_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { - return convert_latin1_to_utf16be(reinterpret_cast(input.data()), - input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_latin1_to_utf16be( + reinterpret_cast(input.data()), input.size(), + output.data()); + } } #endif // SIMDUTF_SPAN /** @@ -1806,7 +5730,10 @@ convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, * @return the length of the string in Latin1 code units (char) required to * encode the UTF-16 string as Latin1 */ -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +latin1_length_from_utf16(size_t length) noexcept { + return length; +} /** * Compute the number of code units that this Latin1 string would require in @@ -1816,7 +5743,10 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; * @return the length of the string in 2-byte code units (char16_t) required to * encode the Latin1 string as UTF-16 */ -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_latin1(size_t length) noexcept { + return length; +} #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 @@ -1833,12 +5763,21 @@ simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept; simdutf_warn_unused size_t convert_latin1_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf32( const detail::input_span_of_byte_like auto &latin1_input, std::span utf32_output) noexcept { - return convert_latin1_to_utf32( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf32::convert( + latin1_input.data(), latin1_input.size(), utf32_output.data()); + } else + #endif + { + return convert_latin1_to_utf32( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), utf32_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 @@ -1860,12 +5799,21 @@ simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_latin1( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&output) noexcept { - return convert_utf8_to_latin1(reinterpret_cast(input.data()), - input.size(), - reinterpret_cast(output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert(input.data(), input.size(), + output.data()); + } else + #endif + { + return convert_utf8_to_latin1(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -1887,11 +5835,19 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( simdutf_warn_unused size_t convert_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { - return convert_utf8_to_utf16(reinterpret_cast(input.data()), - input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_utf8_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); + } } #endif // SIMDUTF_SPAN @@ -1915,11 +5871,19 @@ convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -1943,11 +5907,19 @@ utf8_length_from_utf16le_with_replacement( simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result utf8_length_from_utf16be_with_replacement( std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -1965,11 +5937,19 @@ utf8_length_from_utf16be_with_replacement( simdutf_warn_unused size_t convert_latin1_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { - return convert_latin1_to_utf16(reinterpret_cast(input.data()), - input.size(), output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_latin1_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 @@ -1990,12 +5970,20 @@ convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, simdutf_warn_unused size_t convert_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { - return convert_utf8_to_utf16le( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16le( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2014,12 +6002,21 @@ convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, simdutf_warn_unused size_t convert_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { - return convert_utf8_to_utf16be( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16be( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2044,13 +6041,21 @@ convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, simdutf_warn_unused result convert_utf8_to_latin1_with_errors( const char *input, size_t length, char *latin1_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf8_to_latin1_with_errors( const detail::input_span_of_byte_like auto &utf8_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf8_to_latin1_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert_with_errors( + utf8_input.data(), utf8_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf8_to_latin1_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -2074,13 +6079,21 @@ convert_utf8_to_latin1_with_errors( simdutf_warn_unused result convert_utf8_to_utf16_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf8_to_utf16_with_errors( const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { - return convert_utf8_to_utf16_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2101,13 +6114,21 @@ convert_utf8_to_utf16_with_errors( simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf8_to_utf16le_with_errors( const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { - return convert_utf8_to_utf16le_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16le_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2128,13 +6149,21 @@ convert_utf8_to_utf16le_with_errors( simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf8_to_utf16be_with_errors( const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { - return convert_utf8_to_utf16be_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16be_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2155,12 +6184,20 @@ convert_utf8_to_utf16be_with_errors( simdutf_warn_unused size_t convert_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, std::span utf32_output) noexcept { - return convert_utf8_to_utf32( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_utf8_to_utf32( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2181,13 +6218,21 @@ convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, simdutf_warn_unused result convert_utf8_to_utf32_with_errors( const char *input, size_t length, char32_t *utf32_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf8_to_utf32_with_errors( const detail::input_span_of_byte_like auto &utf8_input, std::span utf32_output) noexcept { - return convert_utf8_to_utf32_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf8_to_utf32_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -2215,12 +6260,21 @@ convert_utf8_to_utf32_with_errors( simdutf_warn_unused size_t convert_valid_utf8_to_latin1( const char *input, size_t length, char *latin1_output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_latin1( const detail::input_span_of_byte_like auto &valid_utf8_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf8_to_latin1( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), latin1_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data()); + } else + #endif + { + return convert_valid_utf8_to_latin1( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), latin1_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -2239,12 +6293,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( simdutf_warn_unused size_t convert_valid_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2261,12 +6324,22 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16le( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16le( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16le( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -2283,12 +6356,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16be( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16be( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16be( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2307,12 +6389,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( simdutf_warn_unused size_t convert_valid_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf32( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf32_output) noexcept { - return convert_valid_utf8_to_utf32( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf32( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf32_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -2329,10 +6420,20 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf8_length_from_latin1( const detail::input_span_of_byte_like auto &latin1_input) noexcept { - return utf8_length_from_latin1( - reinterpret_cast(latin1_input.data()), latin1_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(), + latin1_input.size()); + } else + #endif + { + return utf8_length_from_latin1( + reinterpret_cast(latin1_input.data()), + latin1_input.size()); + } } #endif // SIMDUTF_SPAN @@ -2352,11 +6453,20 @@ simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +latin1_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return latin1_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return latin1_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 @@ -2379,11 +6489,20 @@ simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return utf16_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return utf16_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2408,11 +6527,21 @@ simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return utf32_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return utf32_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -2437,11 +6566,20 @@ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2468,18 +6606,34 @@ simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input, char *utf8_output, size_t utf8_len) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8_safe( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_utf8_safe( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - // implementation note: outputspan is a forwarding ref to avoid copying and - // allow both lvalues and rvalues. std::span can be copied without problems, - // but std::vector should not, and this function should accept both. it will - // allow using an owning rvalue ref (example: passing a temporary std::string) - // as output, but the user will quickly find out that he has no way of getting - // the data out of the object in that case. - return convert_utf16_to_utf8_safe( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data()), utf8_output.size()); + // implementation note: outputspan is a forwarding ref to avoid copying + // and allow both lvalues and rvalues. std::span can be copied without + // problems, but std::vector should not, and this function should accept + // both. it will allow using an owning rvalue ref (example: passing a + // temporary std::string) as output, but the user will quickly find out + // that he has no way of getting the data out of the object in that case. + #if SIMDUTF_CPLUSPLUS23 + if consteval { + const full_result r = + scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data(), + utf8_output.size()); + if (r.error != error_code::SUCCESS && + r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { + return 0; + } + return r.output_count; + } else + #endif + { + return convert_utf16_to_utf8_safe( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data()), utf8_output.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2503,12 +6657,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8_safe( simdutf_warn_unused size_t convert_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2531,12 +6694,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( simdutf_warn_unused size_t convert_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16le_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16le_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16le_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2557,12 +6729,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( simdutf_warn_unused size_t convert_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16be_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16be_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16be_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 @@ -2586,11 +6767,21 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16le_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16le_to_utf8( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2612,11 +6803,21 @@ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16be_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16be_to_utf8( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2641,13 +6842,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( simdutf_warn_unused result convert_utf16_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16_to_latin1_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2669,13 +6878,21 @@ convert_utf16_to_latin1_with_errors( simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16le_to_latin1_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16le_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16le_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2699,13 +6916,21 @@ convert_utf16le_to_latin1_with_errors( simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16be_to_latin1_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16be_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16be_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 @@ -2731,13 +6956,21 @@ convert_utf16be_to_latin1_with_errors( simdutf_warn_unused result convert_utf16_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16_to_utf8_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2760,13 +6993,21 @@ convert_utf16_to_utf8_with_errors( simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16le_to_utf8_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16le_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16le_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2789,20 +7030,28 @@ convert_utf16le_to_utf8_with_errors( simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16be_to_utf8_with_errors( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16be_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16be_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN /** * Using native endianness, convert valid UTF-16 string into UTF-8 string. * - * This function assumes that the input string is valid UTF-16LE. + * This function assumes that the input string is valid UTF-16. * * This function is not BOM-aware. * @@ -2815,12 +7064,21 @@ convert_utf16be_to_utf8_with_errors( simdutf_warn_unused size_t convert_valid_utf16_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2848,12 +7106,23 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( simdutf_warn_unused size_t convert_valid_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16_to_latin1( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2879,13 +7148,23 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16le_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16le_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2911,13 +7190,23 @@ convert_valid_utf16le_to_latin1( simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16be_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16be_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 @@ -2926,8 +7215,7 @@ convert_valid_utf16be_to_latin1( /** * Convert valid UTF-16LE string into UTF-8 string. * - * This function assumes that the input string is valid UTF-16LE and that it can - * be represented as Latin1. + * This function assumes that the input string is valid UTF-16LE * * This function is not BOM-aware. * @@ -2940,12 +7228,21 @@ convert_valid_utf16be_to_latin1( simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16le_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16le_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16le_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -2965,12 +7262,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16be_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16be_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16be_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -2994,11 +7300,20 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( simdutf_warn_unused size_t convert_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf16_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3019,11 +7334,19 @@ convert_utf16_to_utf32(std::span utf16_input, simdutf_warn_unused size_t convert_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf16le_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3044,11 +7367,19 @@ convert_utf16le_to_utf32(std::span utf16_input, simdutf_warn_unused size_t convert_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf16be_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3072,11 +7403,19 @@ convert_utf16be_to_utf32(std::span utf16_input, simdutf_warn_unused result convert_utf16_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16_to_utf32_with_errors(std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3099,12 +7438,20 @@ convert_utf16_to_utf32_with_errors(std::span utf16_input, simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16le_to_utf32_with_errors( std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16le_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16le_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3127,12 +7474,20 @@ convert_utf16le_to_utf32_with_errors( simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf16be_to_utf32_with_errors( std::span utf16_input, std::span utf32_output) noexcept { - return convert_utf16be_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16be_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3153,11 +7508,21 @@ convert_utf16be_to_utf32_with_errors( simdutf_warn_unused size_t convert_valid_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf16_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { - return convert_valid_utf16_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3177,11 +7542,21 @@ convert_valid_utf16_to_utf32(std::span valid_utf16_input, simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf16le_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { - return convert_valid_utf16le_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16le_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3201,32 +7576,25 @@ convert_valid_utf16le_to_utf32(std::span valid_utf16_input, simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf16be_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { - return convert_valid_utf16be_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16be_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Compute the number of bytes that this UTF-16LE/BE string would require in - * Latin1 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; - -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness; Compute the number of bytes that this UTF-16 @@ -3242,10 +7610,18 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t utf8_length_from_utf16(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3270,11 +7646,19 @@ utf8_length_from_utf16(std::span valid_utf16_input) noexcept { simdutf_warn_unused result utf8_length_from_utf16_with_replacement( const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result utf8_length_from_utf16_with_replacement( std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3292,10 +7676,18 @@ utf8_length_from_utf16_with_replacement( simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3313,10 +7705,18 @@ utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 @@ -3339,11 +7739,20 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_utf8( std::span utf32_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert( + utf32_input.data(), utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -3366,13 +7775,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( simdutf_warn_unused result convert_utf32_to_utf8_with_errors( const char32_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf32_to_utf8_with_errors( std::span utf32_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf32_to_utf8_with_errors( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf32_to_utf8_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN @@ -3392,12 +7809,21 @@ convert_utf32_to_utf8_with_errors( simdutf_warn_unused size_t convert_valid_utf32_to_utf8( const char32_t *input, size_t length, char *utf8_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf32_to_utf8( std::span valid_utf32_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf32_to_utf8( - valid_utf32_input.data(), valid_utf32_input.size(), - reinterpret_cast(utf8_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf8( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -3420,11 +7846,19 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( simdutf_warn_unused size_t convert_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf32_to_utf16(std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3444,11 +7878,19 @@ convert_utf32_to_utf16(std::span utf32_input, simdutf_warn_unused size_t convert_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf32_to_utf16le(std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 @@ -3471,12 +7913,21 @@ convert_utf32_to_utf16le(std::span utf32_input, simdutf_warn_unused size_t convert_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_latin1( std::span utf32_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf32_to_latin1( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert( + utf32_input.data(), utf32_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf32_to_latin1( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -3500,13 +7951,21 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( simdutf_warn_unused result convert_utf32_to_latin1_with_errors( const char32_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf32_to_latin1_with_errors( std::span utf32_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf32_to_latin1_with_errors( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert_with_errors( + utf32_input.data(), utf32_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf32_to_latin1_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -3533,12 +7992,23 @@ convert_utf32_to_latin1_with_errors( simdutf_warn_unused size_t convert_valid_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t +convert_valid_utf32_to_latin1( std::span valid_utf32_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf32_to_latin1( - valid_utf32_input.data(), valid_utf32_input.size(), - reinterpret_cast(latin1_output.data())); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert_valid( + detail::constexpr_cast_ptr(valid_utf32_input.data()), + valid_utf32_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } + #endif + { + return convert_valid_utf32_to_latin1( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } } #endif // SIMDUTF_SPAN @@ -3554,7 +8024,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( * @param length the length of the string in 4-byte code units (char32_t) * @return the number of bytes required to encode the UTF-32 string as Latin1 */ -simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept; +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t +latin1_length_from_utf32(size_t length) noexcept { + return length; +} /** * Compute the number of bytes that this Latin1 string would require in UTF-32 @@ -3564,7 +8037,10 @@ simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept; * @return the length of the string in 4-byte code units (char32_t) required to * encode the Latin1 string as UTF-32 */ -simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept; +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t +utf32_length_from_latin1(size_t length) noexcept { + return length; +} #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 @@ -3584,11 +8060,19 @@ simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept; simdutf_warn_unused size_t convert_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_utf32_to_utf16be(std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3612,11 +8096,19 @@ convert_utf32_to_utf16be(std::span utf32_input, simdutf_warn_unused result convert_utf32_to_utf16_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf32_to_utf16_with_errors(std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3639,12 +8131,20 @@ convert_utf32_to_utf16_with_errors(std::span utf32_input, simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf32_to_utf16le_with_errors( std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16le_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16le_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3667,12 +8167,20 @@ convert_utf32_to_utf16le_with_errors( simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result convert_utf32_to_utf16be_with_errors( std::span utf32_input, std::span utf16_output) noexcept { - return convert_utf32_to_utf16be_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16be_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3692,11 +8200,22 @@ convert_utf32_to_utf16be_with_errors( simdutf_warn_unused size_t convert_valid_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf32_to_utf16(std::span valid_utf32_input, std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3716,11 +8235,21 @@ convert_valid_utf32_to_utf16(std::span valid_utf32_input, simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf32_to_utf16le(std::span valid_utf32_input, std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16le( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16le(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN @@ -3740,11 +8269,21 @@ convert_valid_utf32_to_utf16le(std::span valid_utf32_input, simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t convert_valid_utf32_to_utf16be(std::span valid_utf32_input, std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16be( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16be(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 @@ -3766,11 +8305,19 @@ convert_valid_utf32_to_utf16be(std::span valid_utf32_input, void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline void +simdutf_really_inline simdutf_constexpr23 void change_endianness_utf16(std::span utf16_input, std::span utf16_output) noexcept { - return change_endianness_utf16(utf16_input.data(), utf16_input.size(), - utf16_output.data()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::change_endianness_utf16( + utf16_input.data(), utf16_input.size(), utf16_output.data()); + } else + #endif + { + return change_endianness_utf16(utf16_input.data(), utf16_input.size(), + utf16_output.data()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 @@ -3790,10 +8337,18 @@ change_endianness_utf16(std::span utf16_input, simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t utf8_length_from_utf32(std::span valid_utf32_input) noexcept { - return utf8_length_from_utf32(valid_utf32_input.data(), - valid_utf32_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } else + #endif + { + return utf8_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 @@ -3813,10 +8368,18 @@ utf8_length_from_utf32(std::span valid_utf32_input) noexcept { simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t utf16_length_from_utf32(std::span valid_utf32_input) noexcept { - return utf16_length_from_utf32(valid_utf32_input.data(), - valid_utf32_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } else + #endif + { + return utf16_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3838,10 +8401,18 @@ utf16_length_from_utf32(std::span valid_utf32_input) noexcept { simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t utf32_length_from_utf16(std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3863,10 +8434,19 @@ utf32_length_from_utf16(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf16le( std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3888,10 +8468,19 @@ simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf16be( std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 @@ -3914,9 +8503,17 @@ simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( simdutf_warn_unused size_t count_utf16(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf16(std::span valid_utf16_input) noexcept { - return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3937,9 +8534,17 @@ count_utf16(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf16le(std::span valid_utf16_input) noexcept { - return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN @@ -3960,9 +8565,17 @@ count_utf16le(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf16be(std::span valid_utf16_input) noexcept { - return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF16 @@ -3983,10 +8596,18 @@ count_utf16be(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t count_utf8(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t count_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return count_utf8(reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return count_utf8(reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } } #endif // SIMDUTF_SPAN @@ -4006,119 +8627,2001 @@ simdutf_really_inline simdutf_warn_unused size_t count_utf8( */ simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return trim_partial_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return trim_partial_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } } #endif // SIMDUTF_SPAN #endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 -/** - * Given a valid UTF-16BE string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16BE strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16BE, but possibly - * truncated. - * - * @param input the UTF-16BE string to process - * @param length the length of the string in bytes - * @return the length of the string in bytes, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16be(std::span valid_utf16_input) noexcept { - return trim_partial_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); +#if SIMDUTF_FEATURE_UTF16 +/** + * Given a valid UTF-16BE string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16BE strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16BE, but possibly + * truncated. + * + * @param input the UTF-16BE string to process + * @param length the length of the string in bytes + * @return the length of the string in bytes, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16be(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Given a valid UTF-16LE string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16LE strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16LE, but possibly + * truncated. + * + * @param input the UTF-16LE string to process + * @param length the length of the string in bytes + * @return the length of the string in unit, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16le(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Given a valid UTF-16 string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16 strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16, but possibly + * truncated. We use the native endianness. + * + * @param input the UTF-16 string to process + * @param length the length of the string in bytes + * @return the length of the string in unit, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \ + SIMDUTF_FEATURE_DETECT_ENCODING + #ifndef SIMDUTF_NEED_TRAILING_ZEROES + #define SIMDUTF_NEED_TRAILING_ZEROES 1 + #endif +#endif // SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +// base64_options are used to specify the base64 encoding options. +// ASCII spaces are ' ', '\t', '\n', '\r', '\f' +// garbage characters are characters that are not part of the base64 alphabet +// nor ASCII spaces. +constexpr uint64_t base64_reverse_padding = + 2; /* modifier for base64_default and base64_url */ +enum base64_options : uint64_t { + base64_default = 0, /* standard base64 format (with padding) */ + base64_url = 1, /* base64url format (no padding) */ + base64_default_no_padding = + base64_default | + base64_reverse_padding, /* standard base64 format without padding */ + base64_url_with_padding = + base64_url | base64_reverse_padding, /* base64url with padding */ + base64_default_accept_garbage = + 4, /* standard base64 format accepting garbage characters, the input stops + with the first '=' if any */ + base64_url_accept_garbage = + 5, /* base64url format accepting garbage characters, the input stops with + the first '=' if any */ + base64_default_or_url = + 8, /* standard/base64url hybrid format (only meaningful for decoding!) */ + base64_default_or_url_accept_garbage = + 12, /* standard/base64url hybrid format accepting garbage characters + (only meaningful for decoding!), the input stops with the first '=' + if any */ +}; + +// last_chunk_handling_options are used to specify the handling of the last +// chunk in base64 decoding. +// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 +enum last_chunk_handling_options : uint64_t { + loose = 0, /* standard base64 format, decode partial final chunk */ + strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and + unpadded, or non-zero bit padding */ + stop_before_partial = + 2, /* if the last chunk is partial, ignore it (no error) */ + only_full_chunks = + 3 /* only decode full blocks (4 base64 characters, no padding) */ +}; + +inline simdutf_constexpr23 bool +is_partial(last_chunk_handling_options options) { + return (options == stop_before_partial) || (options == only_full_chunks); +} + +namespace detail { +simdutf_warn_unused const char *find(const char *start, const char *end, + char character) noexcept; +simdutf_warn_unused const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept; +} // namespace detail + +/** + * Find the first occurrence of a character in a string. If the character is + * not found, return a pointer to the end of the string. + * @param start the start of the string + * @param end the end of the string + * @param character the character to find + * @return a pointer to the first occurrence of the character in the string, + * or a pointer to the end of the string if the character is not found. + * + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char * +find(const char *start, const char *end, char character) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + for (; start != end; ++start) + if (*start == character) + return start; + return end; + } else + #endif + { + return detail::find(start, end, character); + } +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept { + // implementation note: this is repeated instead of a template, to ensure + // the api is still a function and compiles without concepts + #if SIMDUTF_CPLUSPLUS23 + if consteval { + for (; start != end; ++start) + if (*start == character) + return start; + return end; + } else + #endif + { + return detail::find(start, end, character); + } +} +} + // We include base64_tables once. +/* begin file include/simdutf/base64_tables.h */ +#ifndef SIMDUTF_BASE64_TABLES_H +#define SIMDUTF_BASE64_TABLES_H +#include + +namespace simdutf { +namespace { +namespace tables { +namespace base64 { +namespace base64_default { + +constexpr char e0[256] = { + 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', + 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', + 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', + 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', + 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', + 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', + 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', + 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', + 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', + 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', + 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', + 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', + 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', + 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', + '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', + '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', + '8', '8', '8', '8', '9', '9', '9', '9', '+', '+', '+', '+', '/', '/', '/', + '/'}; + +constexpr char e1[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', + '/'}; + +constexpr char e2[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', + '/'}; + +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_default + +namespace base64_url { + +constexpr char e0[256] = { + 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', + 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', + 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', + 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', + 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', + 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', + 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', + 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', + 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', + 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', + 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', + 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', + 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', + 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', + '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', + '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', + '8', '8', '8', '8', '9', '9', '9', '9', '-', '-', '-', '-', '_', '_', '_', + '_'}; + +constexpr char e1[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', + '_'}; + +constexpr char e2[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', + '_'}; + +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_url + +namespace base64_default_or_url { +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000fc, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000f003, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00c00f00, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003f0000, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_default_or_url +constexpr uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; + +constexpr uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +constexpr unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +constexpr uint8_t to_base64_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, + 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +constexpr uint8_t to_base64_url_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 62, 255, 255, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +constexpr uint8_t to_base64_default_or_url_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, + 62, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +static_assert(sizeof(to_base64_value) == 256, + "to_base64_value must have 256 elements"); +static_assert(sizeof(to_base64_url_value) == 256, + "to_base64_url_value must have 256 elements"); +static_assert(to_base64_value[uint8_t(' ')] == 64, + "space must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t(' ')] == 64, + "space must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\t')] == 64, + "tab must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\t')] == 64, + "tab must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\r')] == 64, + "cr must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\r')] == 64, + "cr must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\n')] == 64, + "lf must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\n')] == 64, + "lf must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\f')] == 64, + "ff must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\f')] == 64, + "ff must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('+')] == 62, + "+ must be == 62 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('-')] == 62, + "- must be == 62 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('/')] == 63, + "/ must be == 63 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('_')] == 63, + "_ must be == 63 in to_base64_url_value"); +} // namespace base64 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_BASE64_TABLES_H +/* end file include/simdutf/base64_tables.h */ +/* begin file include/simdutf/scalar/base64.h */ +#ifndef SIMDUTF_BASE64_H +#define SIMDUTF_BASE64_H + +#include +#include +#include +#include +#include + +namespace simdutf { +namespace scalar { +namespace { +namespace base64 { + +// This function is not expected to be fast. Do not use in long loops. +// In most instances you should be using is_ignorable. +template bool is_ascii_white_space(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +template simdutf_constexpr23 bool is_eight_byte(char_type c) { + if simdutf_constexpr (sizeof(char_type) == 1) { + return true; + } + return uint8_t(c) == c; +} + +template +simdutf_constexpr23 bool is_ignorable(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return false; + } + if (is_eight_byte(c) && code == 64) { + return true; + } + return ignore_garbage; +} +template +simdutf_constexpr23 bool is_base64(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return true; + } + return false; +} + +template +simdutf_constexpr23 bool is_base64_or_padding(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + if (c == '=') { + return true; + } + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return true; + } + return false; +} + +template +bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) { + return is_ignorable(c, options) || c == '='; +} + +struct reduced_input { + size_t equalsigns; // number of padding characters '=', typically 0, 1, 2. + size_t equallocation; // location of the first padding character if any + size_t srclen; // length of the input buffer before padding + size_t full_input_length; // length of the input buffer with padding but + // without ignorable characters +}; + +// find the end of the base64 input buffer +// It returns the number of padding characters, the location of the first +// padding character if any, the length of the input buffer before padding +// and the length of the input buffer with padding. The input buffer is not +// modified. The function assumes that there are at most two padding characters. +template +simdutf_constexpr23 reduced_input find_end(const char_type *src, size_t srclen, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + + size_t equalsigns = 0; + // We intentionally include trailing spaces in the full input length. + // See https://github.com/simdutf/simdutf/issues/824 + size_t full_input_length = srclen; + // skip trailing spaces + while (!ignore_garbage && srclen > 0 && + scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equallocation = + srclen; // location of the first padding character if any + if (ignore_garbage) { + // Technically, we don't need to find the first padding character, we can + // just change our algorithms, but it adds substantial complexity. + auto it = simdutf::find(src, src + srclen, '='); + if (it != src + srclen) { + equallocation = it - src; + equalsigns = 1; + srclen = equallocation; + full_input_length = equallocation + 1; + } + return {equalsigns, equallocation, srclen, full_input_length}; + } + if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { + // This is the last '=' sign. + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + // This is the second '=' sign. + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + return {equalsigns, equallocation, srclen, full_input_length}; +} + +// Returns true upon success. The destination buffer must be large enough. +// This functions assumes that the padding (=) has been removed. +// if check_capacity is true, it will check that the destination buffer is +// large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL. +template +simdutf_constexpr23 full_result base64_tail_decode_impl( + char *dst, size_t outlen, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + char *dstend = dst + outlen; + (void)dstend; + // This looks like 10 branches, but we expect the compiler to resolve this to + // two branches (easily predicted): + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const uint32_t *d0 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d0 + : ((options & base64_url) ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0); + const uint32_t *d1 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d1 + : ((options & base64_url) ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1); + const uint32_t *d2 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d2 + : ((options & base64_url) ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2); + const uint32_t *d3 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d3 + : ((options & base64_url) ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (srcend - src >= 4 && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (check_capacity && dstend - dst < 3) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast(x & 0xFF); + *dst++ = static_cast((x >> 8) & 0xFF); + *dst++ = static_cast((x >> 16) & 0xFF); + src += 4; + } + const char_type *srccur = src; + idx = 0; + // we need at least four characters. +#ifdef __clang__ + // If possible, we read four characters at a time. (It is an optimization.) + if (ignore_garbage && src + 4 <= srcend) { + char_type c0 = src[0]; + char_type c1 = src[1]; + char_type c2 = src[2]; + char_type c3 = src[3]; + + uint8_t code0 = to_base64[uint8_t(c0)]; + uint8_t code1 = to_base64[uint8_t(c1)]; + uint8_t code2 = to_base64[uint8_t(c2)]; + uint8_t code3 = to_base64[uint8_t(c3)]; + + buffer[idx] = code0; + idx += (is_eight_byte(c0) && code0 <= 63); + buffer[idx] = code1; + idx += (is_eight_byte(c1) && code1 <= 63); + buffer[idx] = code2; + idx += (is_eight_byte(c2) && code2 <= 63); + buffer[idx] = code3; + idx += (is_eight_byte(c3) && code3 <= 63); + src += 4; + } +#endif + while ((idx < 4) && (src < srcend)) { + char_type c = *src; + + uint8_t code = to_base64[uint8_t(c)]; + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (!ignore_garbage && + (code > 64 || !scalar::base64::is_eight_byte(c))) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else { + // We have a space or a newline or garbage. We ignore it. + } + src++; + } + if (idx != 4) { + simdutf_log_assert(idx < 4, "idx should be less than 4"); + // We never should have that the number of base64 characters + the + // number of padding characters is more than 4. + if (!ignore_garbage && (idx + padding_characters > 4)) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } + + // The idea here is that in loose mode, + // if there is padding at all, it must be used + // to form 4-wise chunk. However, in loose mode, + // we do accept no padding at all. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::loose && + (idx >= 2) && padding_characters > 0 && + ((idx + padding_characters) & 3) != 0) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } else + + // The idea here is that in strict mode, we do not want to accept + // incomplete base64 chunks. So if the chunk was otherwise valid, we + // return BASE64_INPUT_REMAINDER. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict && + (idx >= 2) && ((idx + padding_characters) & 3) != 0) { + // The partial chunk was at src - idx + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } else + // If there is a partial chunk with insufficient padding, with + // stop_before_partial, we need to just ignore it. In "only full" + // mode, skip the minute there are padding characters. + if ((last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (padding_characters + idx < 4) && (idx != 0) && + (idx >= 2 || padding_characters == 0)) || + (last_chunk_options == + last_chunk_handling_options::only_full_chunks && + (idx >= 2 || padding_characters == 0))) { + // partial means that we are *not* going to consume the read + // characters. We need to rewind the src pointer. + src = srccur; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } else { + if (idx == 2) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (check_capacity && dstend - dst < 1) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast((triple >> 16) & 0xFF); + } else if (idx == 3) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (check_capacity && dstend - dst < 2) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast((triple >> 16) & 0xFF); + *dst++ = static_cast((triple >> 8) & 0xFF); + } else if (!ignore_garbage && idx == 1 && + (!is_partial(last_chunk_options) || + (is_partial(last_chunk_options) && + padding_characters > 0))) { + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (!ignore_garbage && idx == 0 && padding_characters > 0) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } + } + if (check_capacity && dstend - dst < 3) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); + *dst++ = static_cast((triple >> 16) & 0xFF); + *dst++ = static_cast((triple >> 8) & 0xFF); + *dst++ = static_cast(triple & 0xFF); + } +} + +template +simdutf_constexpr23 full_result base64_tail_decode( + char *dst, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + return base64_tail_decode_impl(dst, 0, src, length, padding_characters, + options, last_chunk_options); +} + +// like base64_tail_decode, but it will not write past the end of the output +// buffer. The outlen parameter is modified to reflect the number of bytes +// written. This functions assumes that the padding (=) has been removed. +// +template +simdutf_constexpr23 full_result base64_tail_decode_safe( + char *dst, size_t outlen, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + return base64_tail_decode_impl(dst, outlen, src, length, + padding_characters, options, + last_chunk_options); +} + +inline simdutf_constexpr23 full_result +patch_tail_result(full_result r, size_t previous_input, size_t previous_output, + size_t equallocation, size_t full_input_length, + last_chunk_handling_options last_chunk_options) { + r.input_count += previous_input; + r.output_count += previous_output; + if (r.padding_error) { + r.input_count = equallocation; + } + + if (r.error == error_code::SUCCESS) { + if (!is_partial(last_chunk_options)) { + // A success when we are not in stop_before_partial mode. + // means that we have consumed the whole input buffer. + r.input_count = full_input_length; + } else if (r.output_count % 3 != 0) { + r.input_count = full_input_length; + } + } + return r; +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +template +simdutf_constexpr23 size_t tail_encode_base64_impl( + char *dst, const char *src, size_t srclen, base64_options options, + size_t line_length = simdutf::default_line_length, size_t line_offset = 0) { + if simdutf_constexpr (use_lines) { + // sanitize line_length and starting_line_offset. + // line_length must be greater than 3. + if (line_length < 4) { + line_length = 4; + } + simdutf_log_assert(line_offset <= line_length, + "line_offset should be less than line_length"); + } + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + // This looks like 3 branches, but we expect the compiler to resolve this to + // a single branch: + const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 + : tables::base64::base64_default::e0; + const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 + : tables::base64::base64_default::e1; + const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 + : tables::base64::base64_default::e2; + char *out = dst; + size_t i = 0; + uint8_t t1, t2, t3; + for (; i + 2 < srclen; i += 3) { + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + t3 = uint8_t(src[i + 2]); + if simdutf_constexpr (use_lines) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 4; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 3; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 2; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = '\n'; + *out++ = e2[t3]; + line_offset = 1; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset += 4; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + } + } + switch (srclen - i) { + case 0: + break; + case 1: + t1 = uint8_t(src[i]); + if simdutf_constexpr (use_lines) { + if (use_padding) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '\n'; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '\n'; + *out++ = '='; + } + } else { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } + } else { + if (line_offset + 2 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } else if (line_offset + 1 == line_length) { + *out++ = e0[uint8_t(src[i])]; + *out++ = '\n'; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } else { + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + // *out++ = '\n'; ==> no newline at the end of the output + } + } else { + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } + } + } else { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + if (use_padding) { + *out++ = '='; + *out++ = '='; + } + } + break; + default: /* case 2 */ + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + if simdutf_constexpr (use_lines) { + if (use_padding) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '\n'; + *out++ = '='; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } + } else { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e2[(t2 & 0x0F) << 2]; + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + // *out++ = '\n'; ==> no newline at the end of the output + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + if (use_padding) { + *out++ = '='; + } + } + } + return (size_t)(out - dst); +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +inline simdutf_constexpr23 size_t tail_encode_base64(char *dst, const char *src, + size_t srclen, + base64_options options) { + return tail_encode_base64_impl(dst, src, srclen, options); +} + +template +simdutf_warn_unused simdutf_constexpr23 size_t +maximal_binary_length_from_base64(InputPtr input, size_t length) noexcept { + // We process the padding characters ('=') at the end to make sure + // that we return an exact result when the input has no ignorable characters + // (e.g., spaces). + size_t padding = 0; + if (length > 0) { + if (input[length - 1] == '=') { + padding++; + if (length > 1 && input[length - 2] == '=') { + padding++; + } + } + } + // The input is not otherwise processed for ignorable characters or + // validation, so that the function runs in constant time (very fast). In + // practice, base64 inputs without ignorable characters are common and the + // common case are line separated inputs with relatively long lines (e.g., 76 + // characters) which leads this function to a slight (1%) overestimation of + // the output size. + // + // Of course, some inputs might contain an arbitrary number of spaces or + // newlines, which would make this function return a very pessimistic output + // size but systems that produce base64 outputs typically do not do that and + // if they do, they do not care much about minimizing memory usage. + // + // In specialized applications, users may know that their input is line + // separated, which can be checked very quickly by by iterating (e.g., over 76 + // character chunks, looking for the linefeed characters only). We could + // provide a specialized function for that, but it is not clear that the added + // complexity is worth it for us. + // + size_t actual_length = length - padding; + if (actual_length % 4 <= 1) { + return actual_length / 4 * 3; + } + // if we have a valid input, then the remainder must be 2 or 3 adding one or + // two extra bytes. + return actual_length / 4 * 3 + (actual_length % 4) - 1; +} + +template +simdutf_warn_unused simdutf_constexpr23 full_result +base64_to_binary_details_impl( + const char_type *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (length == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0 && !ignore_garbage) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(input + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(input + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; +} + +template +simdutf_constexpr23 simdutf_warn_unused full_result +base64_to_binary_details_safe_impl( + const char_type *input, size_t length, char *output, size_t outlen, + base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (length == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + full_result r = scalar::base64::base64_tail_decode_safe( + output, outlen, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0 && !ignore_garbage) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(input + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(input + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; +} + +simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + if (!use_padding) { + return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); + } + return (length + 2) / 3 * + 4; // We use padding to make the length a multiple of 4. } - #endif // SIMDUTF_SPAN -/** - * Given a valid UTF-16LE string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16LE strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16LE, but possibly - * truncated. - * - * @param input the UTF-16LE string to process - * @param length the length of the string in bytes - * @return the length of the string in unit, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16le(std::span valid_utf16_input) noexcept { - return trim_partial_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN +simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary_with_lines(size_t length, base64_options options, + size_t line_length) noexcept { + if (length == 0) { + return 0; + } + size_t base64_length = + scalar::base64::base64_length_from_binary(length, options); + if (line_length < 4) { + line_length = 4; + } + size_t lines = + (base64_length + line_length - 1) / line_length; // number of lines + return base64_length + lines - 1; +} + +// Return the length of the prefix that contains count base64 characters. +// Thus, if count is 3, the function returns the length of the prefix +// that contains 3 base64 characters. +// The function returns (size_t)-1 if there is not enough base64 characters in +// the input. +template +simdutf_warn_unused size_t prefix_length(size_t count, + simdutf::base64_options options, + const char_type *input, + size_t length) noexcept { + size_t i = 0; + while (i < length && is_ignorable(input[i], options)) { + i++; + } + if (count == 0) { + return i; // duh! + } + for (; i < length; i++) { + if (is_ignorable(input[i], options)) { + continue; + } + // We have a base64 character or a padding character. + count--; + if (count == 0) { + return i + 1; + } + } + simdutf_log_assert(false, "You never get here"); -/** - * Given a valid UTF-16 string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16 strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16, but possibly - * truncated. We use the native endianness. - * - * @param input the UTF-16 string to process - * @param length the length of the string in bytes - * @return the length of the string in unit, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16(std::span valid_utf16_input) noexcept { - return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size()); + return -1; // should never happen } - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_BASE64 - #ifndef SIMDUTF_NEED_TRAILING_ZEROES - #define SIMDUTF_NEED_TRAILING_ZEROES 1 - #endif -// base64_options are used to specify the base64 encoding options. -// ASCII spaces are ' ', '\t', '\n', '\r', '\f' -// garbage characters are characters that are not part of the base64 alphabet -// nor ASCII spaces. -constexpr uint64_t base64_reverse_padding = - 2; /* modifier for base64_default and base64_url */ -enum base64_options : uint64_t { - base64_default = 0, /* standard base64 format (with padding) */ - base64_url = 1, /* base64url format (no padding) */ - base64_default_no_padding = - base64_default | - base64_reverse_padding, /* standard base64 format without padding */ - base64_url_with_padding = - base64_url | base64_reverse_padding, /* base64url with padding */ - base64_default_accept_garbage = - 4, /* standard base64 format accepting garbage characters, the input stops - with the first '=' if any */ - base64_url_accept_garbage = - 5, /* base64url format accepting garbage characters, the input stops with - the first '=' if any */ - base64_default_or_url = - 8, /* standard/base64url hybrid format (only meaningful for decoding!) */ - base64_default_or_url_accept_garbage = - 12, /* standard/base64url hybrid format accepting garbage characters - (only meaningful for decoding!), the input stops with the first '=' - if any */ -}; +} // namespace base64 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/base64.h */ + +namespace simdutf { #if SIMDUTF_CPLUSPLUS17 inline std::string_view to_string(base64_options options) { @@ -4144,23 +10647,6 @@ inline std::string_view to_string(base64_options options) { } #endif // SIMDUTF_CPLUSPLUS17 -// last_chunk_handling_options are used to specify the handling of the last -// chunk in base64 decoding. -// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 -enum last_chunk_handling_options : uint64_t { - loose = 0, /* standard base64 format, decode partial final chunk */ - strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and - unpadded, or non-zero bit padding */ - stop_before_partial = - 2, /* if the last chunk is partial, ignore it (no error) */ - only_full_chunks = - 3 /* only decode full blocks (4 base64 characters, no padding) */ -}; - -inline bool is_partial(last_chunk_handling_options options) { - return (options == stop_before_partial) || (options == only_full_chunks); -} - #if SIMDUTF_CPLUSPLUS17 inline std::string_view to_string(last_chunk_handling_options options) { switch (options) { @@ -4193,11 +10679,19 @@ inline std::string_view to_string(last_chunk_handling_options options) { simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t maximal_binary_length_from_base64( const detail::input_span_of_byte_like auto &input) noexcept { - return maximal_binary_length_from_base64( - reinterpret_cast(input.data()), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::maximal_binary_length_from_base64( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return maximal_binary_length_from_base64( + reinterpret_cast(input.data()), input.size()); + } } #endif // SIMDUTF_SPAN @@ -4218,9 +10712,17 @@ maximal_binary_length_from_base64( simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t maximal_binary_length_from_base64(std::span input) noexcept { - return maximal_binary_length_from_base64(input.data(), input.size()); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::maximal_binary_length_from_base64(input.data(), + input.size()); + } else + #endif + { + return maximal_binary_length_from_base64(input.data(), input.size()); + } } #endif // SIMDUTF_SPAN @@ -4283,15 +10785,25 @@ simdutf_warn_unused result base64_to_binary( base64_options options = base64_default, last_chunk_handling_options last_chunk_options = loose) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +base64_to_binary( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = loose) noexcept { - return base64_to_binary(reinterpret_cast(input.data()), - input.size(), - reinterpret_cast(binary_output.data()), - options, last_chunk_options); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::base64_to_binary_details_impl( + input.data(), input.size(), binary_output.data(), options, + last_chunk_options); + } else + #endif + { + return base64_to_binary(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); + } } #endif // SIMDUTF_SPAN @@ -4301,8 +10813,10 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary( * @param length the length of the input in bytes * @return number of base64 bytes */ -simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options = base64_default) noexcept; +inline simdutf_warn_unused simdutf_constexpr23 size_t base64_length_from_binary( + size_t length, base64_options options = base64_default) noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} /** * Provide the base64 length in bytes given the length of a binary input, @@ -4313,9 +10827,13 @@ simdutf_warn_unused size_t base64_length_from_binary( * interpreted as 4), * @return number of base64 bytes */ -simdutf_warn_unused size_t base64_length_from_binary_with_lines( +inline simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary_with_lines( size_t length, base64_options options = base64_default, - size_t line_length = default_line_length) noexcept; + size_t line_length = default_line_length) noexcept { + return scalar::base64::base64_length_from_binary_with_lines(length, options, + line_length); +} /** * Convert a binary input to a base64 output. @@ -4341,13 +10859,21 @@ simdutf_warn_unused size_t base64_length_from_binary_with_lines( size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options = base64_default) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t binary_to_base64(const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, base64_options options = base64_default) noexcept { - return binary_to_base64( - reinterpret_cast(input.data()), input.size(), - reinterpret_cast(binary_output.data()), options); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::tail_encode_base64( + binary_output.data(), input.data(), input.size(), options); + } else + #endif + { + return binary_to_base64( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), options); + } } #endif // SIMDUTF_SPAN @@ -4380,14 +10906,23 @@ binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length = simdutf::default_line_length, base64_options options = base64_default) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t binary_to_base64_with_lines( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +binary_to_base64_with_lines( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, size_t line_length = simdutf::default_line_length, base64_options options = base64_default) noexcept { - return binary_to_base64_with_lines( - reinterpret_cast(input.data()), input.size(), - reinterpret_cast(binary_output.data()), line_length, options); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::tail_encode_base64_impl( + binary_output.data(), input.data(), input.size(), options, line_length); + } else + #endif + { + return binary_to_base64_with_lines( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), line_length, options); + } } #endif // SIMDUTF_SPAN @@ -4510,32 +11045,46 @@ base64_to_binary(const char16_t *input, size_t length, char *output, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary( +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +base64_to_binary( std::span input, detail::output_span_of_byte_like auto &&binary_output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = loose) noexcept { - return base64_to_binary(input.data(), input.size(), - reinterpret_cast(binary_output.data()), - options, last_chunk_options); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::base64_to_binary_details_impl( + input.data(), input.size(), binary_output.data(), options, + last_chunk_options); + } else + #endif + { + return base64_to_binary(input.data(), input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); + } } #endif // SIMDUTF_SPAN /** - * Check if a character is an ignorabl base64 character. + * Check if a character is an ignorable base64 character. * Checking a large input, character by character, is not computationally * efficient. * * @param input the character to check * @param options the base64 options to use, is base64_default by default. - * @return true if the character is an ignorablee base64 character, false + * @return true if the character is an ignorable base64 character, false * otherwise. */ -simdutf_warn_unused bool -base64_ignorable(char input, base64_options options = base64_default) noexcept; -simdutf_warn_unused bool +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_ignorable(char input, base64_options options = base64_default) noexcept { + return scalar::base64::is_ignorable(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool base64_ignorable(char16_t input, - base64_options options = base64_default) noexcept; + base64_options options = base64_default) noexcept { + return scalar::base64::is_ignorable(input, options); +} /** * Check if a character is a valid base64 character. @@ -4548,10 +11097,14 @@ base64_ignorable(char16_t input, * @param options the base64 options to use, is base64_default by default. * @return true if the character is a base64 character, false otherwise. */ -simdutf_warn_unused bool -base64_valid(char input, base64_options options = base64_default) noexcept; -simdutf_warn_unused bool -base64_valid(char16_t input, base64_options options = base64_default) noexcept; +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid(char input, base64_options options = base64_default) noexcept { + return scalar::base64::is_base64(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid(char16_t input, base64_options options = base64_default) noexcept { + return scalar::base64::is_base64(input, options); +} /** * Check if a character is a valid base64 character or the padding character @@ -4562,12 +11115,16 @@ base64_valid(char16_t input, base64_options options = base64_default) noexcept; * @param options the base64 options to use, is base64_default by default. * @return true if the character is a base64 character, false otherwise. */ -simdutf_warn_unused bool +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool base64_valid_or_padding(char input, - base64_options options = base64_default) noexcept; -simdutf_warn_unused bool + base64_options options = base64_default) noexcept { + return scalar::base64::is_base64_or_padding(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool base64_valid_or_padding(char16_t input, - base64_options options = base64_default) noexcept; + base64_options options = base64_default) noexcept { + return scalar::base64::is_base64_or_padding(input, options); +} /** * Convert a base64 input to a binary output. @@ -4642,25 +11199,7 @@ base64_to_binary_safe(const char *input, size_t length, char *output, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose, bool decode_up_to_bad_char = false) noexcept; - #if SIMDUTF_SPAN -/** - * @brief span overload - * @return a tuple of result and outlen - */ -simdutf_really_inline simdutf_warn_unused std::tuple -base64_to_binary_safe(const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose, - bool decode_up_to_bad_char = false) noexcept { - size_t outlen = binary_output.size(); - auto r = base64_to_binary_safe( - reinterpret_cast(input.data()), input.size(), - reinterpret_cast(binary_output.data()), outlen, options, - last_chunk_options, decode_up_to_bad_char); - return {r, outlen}; -} - #endif // SIMDUTF_SPAN +// the span overload has moved to the bottom of the file simdutf_warn_unused result base64_to_binary_safe(const char16_t *input, size_t length, char *output, @@ -4668,25 +11207,7 @@ base64_to_binary_safe(const char16_t *input, size_t length, char *output, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose, bool decode_up_to_bad_char = false) noexcept; - #if SIMDUTF_SPAN -/** - * @brief span overload - * @return a tuple of result and outlen - */ -simdutf_really_inline simdutf_warn_unused std::tuple -base64_to_binary_safe(std::span input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose, - bool decode_up_to_bad_char = false) noexcept { - size_t outlen = binary_output.size(); - auto r = base64_to_binary_safe(input.data(), input.size(), - reinterpret_cast(binary_output.data()), - outlen, options, last_chunk_options, - decode_up_to_bad_char); - return {r, outlen}; -} - #endif // SIMDUTF_SPAN + // span overload moved to bottom of file #if SIMDUTF_ATOMIC_REF /** @@ -4780,20 +11301,6 @@ atomic_base64_to_binary_safe( #endif // SIMDUTF_SPAN #endif // SIMDUTF_ATOMIC_REF -/** - * Find the first occurrence of a character in a string. If the character is - * not found, return a pointer to the end of the string. - * @param start the start of the string - * @param end the end of the string - * @param character the character to find - * @return a pointer to the first occurrence of the character in the string, - * or a pointer to the end of the string if the character is not found. - * - */ -simdutf_warn_unused const char *find(const char *start, const char *end, - char character) noexcept; -simdutf_warn_unused const char16_t * -find(const char16_t *start, const char16_t *end, char16_t character) noexcept; #endif // SIMDUTF_FEATURE_BASE64 /** @@ -6489,6 +12996,7 @@ class implementation { base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept = 0; + /** * Convert a base64 input to a binary output. * @@ -6630,6 +13138,7 @@ class implementation { const char *input, size_t length, char *output, size_t line_length = simdutf::default_line_length, base64_options options = base64_default) const noexcept = 0; + /** * Find the first occurrence of a character in a string. If the character is * not found, return a pointer to the end of the string. @@ -6811,6 +13320,246 @@ get_active_implementation(); } // namespace simdutf +#if SIMDUTF_FEATURE_BASE64 + // this header is not part of the public api +/* begin file include/simdutf/base64_implementation.h */ +#ifndef SIMDUTF_BASE64_IMPLEMENTATION_H +#define SIMDUTF_BASE64_IMPLEMENTATION_H + +// this is not part of the public api + +namespace simdutf { + +template +simdutf_warn_unused simdutf_constexpr23 result slow_base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = (options & base64_default_accept_garbage) != 0; + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + (void)full_input_length; + if (length == 0) { + outlen = 0; + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + + // The parameters of base64_tail_decode_safe are: + // - dst: the output buffer + // - outlen: the size of the output buffer + // - srcr: the input buffer + // - length: the size of the input buffer + // - padded_characters: the number of padding characters + // - options: the options for the base64 decoder + // - last_chunk_options: the options for the last chunk + // The function will return the number of bytes written to the output buffer + // and the number of bytes read from the input buffer. + // The function will also return an error code if the input buffer is not + // valid base64. + full_result r = scalar::base64::base64_tail_decode_safe( + output, outlen, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + outlen = r.output_count; + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0) { + // additional checks + if ((outlen % 3 == 0) || ((outlen % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + } + } + return {r.error, r.input_count}; // we cannot return r itself because it gets + // converted to error/output_count +} + +template +simdutf_warn_unused simdutf_constexpr23 result base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + static_assert(std::is_same::value || + std::is_same::value, + "Only char and char16_t are supported."); + size_t remaining_input_length = length; + size_t remaining_output_length = outlen; + size_t input_position = 0; + size_t output_position = 0; + + // We also do a first pass using the fast path to decode as much as possible + size_t safe_input = (std::min)( + remaining_input_length, + base64_length_from_binary(remaining_output_length / 3 * 3, options)); + bool done_with_partial = (safe_input == remaining_input_length); + simdutf::full_result r; + +#if SIMDUTF_CPLUSPLUS23 + if consteval { + r = scalar::base64::base64_to_binary_details_impl( + input + input_position, safe_input, output + output_position, options, + done_with_partial + ? last_chunk_handling_options + : simdutf::last_chunk_handling_options::only_full_chunks); + } else +#endif + { + r = get_active_implementation()->base64_to_binary_details( + input + input_position, safe_input, output + output_position, options, + done_with_partial + ? last_chunk_handling_options + : simdutf::last_chunk_handling_options::only_full_chunks); + } + simdutf_log_assert(r.input_count <= safe_input, + "You should not read more than safe_input"); + simdutf_log_assert(r.output_count <= remaining_output_length, + "You should not write more than remaining_output_length"); + // Technically redundant, but we want to be explicit about it. + input_position += r.input_count; + output_position += r.output_count; + remaining_input_length -= r.input_count; + remaining_output_length -= r.output_count; + if (r.error != simdutf::error_code::SUCCESS) { + // There is an error. We return. + if (decode_up_to_bad_char && + r.error == error_code::INVALID_BASE64_CHARACTER) { + return slow_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); + } + outlen = output_position; + return {r.error, input_position}; + } + + if (done_with_partial) { + // We are done. We have decoded everything. + outlen = output_position; + return {simdutf::error_code::SUCCESS, input_position}; + } + // We have decoded some data, but we still have some data to decode. + // We need to decode the rest of the input buffer. + r = simdutf::scalar::base64::base64_to_binary_details_safe_impl( + input + input_position, remaining_input_length, output + output_position, + remaining_output_length, options, last_chunk_handling_options); + input_position += r.input_count; + output_position += r.output_count; + remaining_input_length -= r.input_count; + remaining_output_length -= r.output_count; + + if (r.error != simdutf::error_code::SUCCESS) { + // There is an error. We return. + if (decode_up_to_bad_char && + r.error == error_code::INVALID_BASE64_CHARACTER) { + return slow_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); + } + outlen = output_position; + return {r.error, input_position}; + } + if (input_position < length) { + // We cannot process the entire input in one go, so we need to + // process it in two steps: first the fast path, then the slow path. + // In some cases, the processing might 'eat up' trailing ignorable + // characters in the fast path, but that can be a problem. + // suppose we have just white space followed by a single base64 character. + // If we first process the white space with the fast path, it will + // eat all of it. But, by the JavaScript standard, we should consume + // no character. See + // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + while (input_position > 0 && + base64_ignorable(input[input_position - 1], options)) { + input_position--; + } + } + outlen = output_position; + return {simdutf::error_code::SUCCESS, input_position}; +} + +} // namespace simdutf +#endif // SIMDUTF_BASE64_IMPLEMENTATION_H +/* end file include/simdutf/base64_implementation.h */ + +namespace simdutf { + #if SIMDUTF_SPAN +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_really_inline + simdutf_constexpr23 simdutf_warn_unused std::tuple + base64_to_binary_safe( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = binary_output.size(); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + using CInput = std::decay_t; + static_assert(std::is_same_v, + "sorry, the constexpr implementation is for now limited to " + "input of type char"); + using COutput = std::decay_t; + static_assert(std::is_same_v, + "sorry, the constexpr implementation is for now limited to " + "output of type char"); + auto r = base64_to_binary_safe_impl( + input.data(), input.size(), binary_output.data(), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } else + #endif + { + auto r = base64_to_binary_safe_impl( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } +} + + #if SIMDUTF_SPAN +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_really_inline + simdutf_warn_unused simdutf_constexpr23 std::tuple + base64_to_binary_safe( + std::span input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = binary_output.size(); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + auto r = base64_to_binary_safe_impl( + input.data(), input.size(), binary_output.data(), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } else + #endif + { + auto r = base64_to_binary_safe( + input.data(), input.size(), + reinterpret_cast(binary_output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } +} + #endif // SIMDUTF_SPAN + + #endif // SIMDUTF_SPAN +} // namespace simdutf + +#endif // SIMDUTF_FEATURE_BASE64 + #endif // SIMDUTF_IMPLEMENTATION_H /* end file include/simdutf/implementation.h */