diff --git a/tests/Makefile b/tests/Makefile index 3e57d3f..5a0cfbc 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -40,6 +40,10 @@ test:: chi2.exe @echo "Testing random number generation..." @if ./chi2.exe; then echo "chi2: passed"; else echo "chi2: FAILED"; exit 2; fi +test:: tobytes.exe + @echo "Testing tobytes..." + @if ./tobytes.exe | cmp -s tobytes.output - ; then echo "tobytes: passed"; else echo "tobytes: FAILED"; exit 2; fi + bench:: timings.exe ./timings.exe diff --git a/tests/tobytes.ml b/tests/tobytes.ml new file mode 100644 index 0000000..4b50bcf --- /dev/null +++ b/tests/tobytes.ml @@ -0,0 +1,54 @@ +open Printf + +let test_cases = [ + (Z.zero, None, `Big, false, ""); + (Z.zero, Some 2, `Big, false, "\x00\x00"); + (Z.zero, Some 2, `Little, true, "\x00\x00"); + (Z.one, None, `Big, false, "\x01"); + (Z.one, None, `Little, false, "\x01"); + (Z.one, None, `Big, true, "\x01"); + (Z.one, None, `Little, true, "\x01"); + (Z.of_int 127, None, `Little, true, "\x7F"); + (Z.of_int (-128), None, `Little, true, "\x80"); + (Z.of_int 0xABCD, None, `Big, false, "\xAB\xCD"); + (Z.of_int 0xABCD, None, `Little, false, "\xCD\xAB"); + (Z.of_int 0xABCD, Some 4, `Big, false, "\x00\x00\xAB\xCD"); + (Z.of_int 0xABCD, Some 4, `Little, false, "\xCD\xAB\x00\x00"); + (Z.of_int (-1234), None, `Big, true, "\xFB\x2E"); + (Z.of_int (-1234), None, `Little, true, "\x2E\xFB"); + (Z.of_int (-1234), Some 4, `Big, true, "\xFF\xFF\xFB\x2E"); + (Z.of_int (-1234), Some 4, `Little, true, "\x2E\xFB\xFF\xFF"); + (Z.of_int 0xABCD, None, `Big, true, "\x00\xAB\xCD"); + (Z.of_int 0xABCD, None, `Little, true, "\xCD\xAB\x00"); + (Z.of_string "0x123456789ABCDEF0123456789ABCDEF", None, `Big, false, + "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45\x67\x89\xAB\xCD\xEF"); + (Z.of_string "0x123456789ABCDEF0123456789ABCDEF", None, `Little, false, + "\xEF\xCD\xAB\x89\x67\x45\x23\x01\xEF\xCD\xAB\x89\x67\x45\x23\x01"); + (Z.of_string "0x123456789ABCDEF0123456789ABCDEF", Some 18, `Big, false, + "\x00\x00\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45\x67\x89\xAB\xCD\xEF") +] + +let failure_cases = [ + (Z.of_int 0xABCD, 1, false); + (Z.of_int 0xABCD, 2, true); + (Z.of_int (-1), 1, false); + (Z.of_int (-1234), 1, true); + (Z.of_string "0x123456789ABCDEF0123456789ABCDEF", 14, false) +] + +let _ = + List.iteri + (fun i (x, len, endian, signed, res) -> + let s = Z.to_bytes ?len ~endian ~signed x in + if s <> res + then printf "Test #%d: failed\n" i; + if Z.of_bytes ~endian ~signed res <> x + then printf "Test #%d: round-trip failed\n" i) + test_cases; + List.iteri + (fun i (x, len, signed) -> + try + ignore (Z.to_bytes ~len ~endian:`Big ~signed x); + printf "Failure test #%d did not fail!\n" i + with Invalid_argument _ | Z.Overflow -> ()) + failure_cases diff --git a/tests/tobytes.output b/tests/tobytes.output new file mode 100644 index 0000000..e69de29 diff --git a/z.ml b/z.ml index 58faadb..77d0d99 100644 --- a/z.ml +++ b/z.ml @@ -522,6 +522,51 @@ let random_int_gen ~fill bound = let random_bits_gen ~fill nbits = random_bits_aux (raw_bits_from_bytes ~fill) nbits +(* Conversions Z.t <-> byte sequence *) + +let string_init len (f: int -> int) = + String.init len (fun i -> Char.unsafe_chr (f i)) + +let to_bytes ?len ~(endian:[`Big|`Little]) ?(signed = false) x = + let (y, mask) = + if sign x >= 0 then (x, 0x00) + else if signed then (pred (neg x), 0xFF) + else invalid_arg "Z.to_bytes" in + (* Signed representation needs one extra bit for the sign *) + let nb = + numbits y + (if signed then 1 else 0) in + let l = + match len with + | None -> (nb + 7) / 8 + | Some len -> if nb <= len * 8 then len else raise Overflow in + let s = to_bits y in + let get_byte i = + let j = + match endian with + | `Little -> i + | `Big -> l - 1 - i in + if j >= 0 && j < String.length s + then String.get_uint8 s j lxor mask + else mask in + string_init l get_byte + +let of_bytes ~(endian:[`Big|`Little]) ?(signed = false) s = + let l = String.length s in + let sign_byte = + if l = 0 + then '\x00' + else s.[match endian with `Little -> l - 1 | `Big -> 0] in + let mask = + if signed && Char.code sign_byte land 0x80 = 0x80 then 0xFF else 0x00 in + let get_byte i = + let j = + match endian with + | `Little -> i + | `Big -> l - 1 - i in + String.get_uint8 s j lxor mask in + let x = of_bits (string_init l get_byte) in + if mask = 0 then x else pred (neg x) + (* Infix notations *) let (~-) = neg diff --git a/z.mli b/z.mli index 1794942..83fdd49 100644 --- a/z.mli +++ b/z.mli @@ -698,12 +698,62 @@ val signed_extract: t -> int -> int -> t Raises an [Invalid_argument] if [off] is strictly negative, or if [len] is negative or null. *) +val to_bytes: + ?len: int -> endian:[`Big|`Little] -> ?signed: bool -> t -> string +(** [to_bytes ~endian x] returns a binary representation of the integer [x] + as a sequence of bytes. + @param len the desired length (number of bytes) for the result. + If [len] is not provided, the smallest number of bytes needed + to represent [x] is used. + If [len] is provided and [x] cannot represented in [len] bytes, + the exception [Overflow] is raised. + @param endian the desired endianness for the result: + [~endian:`Big] for big-endian (most significant byte first); + [~endian:`Little] for little-endian (least significant byte first). + @param signed if negative numbers are allowed and should be represented + in two's complement. If [signed] is [false] and [x] is negative, + an [Invalid_argument] exception is raised. + If [signed] is not provided, it defaults to [false]. + @raise Overflow if [x] cannot be represented in [len] bytes. + @raise Invalid_argument if [x] is negative and [signed] is not [true]. + @since 1.15 +*) + +val of_bytes: endian:[`Big|`Little] -> ?signed: bool -> string -> t +(** [of_bytes ~endian s] returns the integer represented by the string [s], + which is treated as a sequence of bytes. + + The following round-trip properties hold: + - [of_bytes ~endian (to_bytes ~endian x) = x] if [x] is nonnegative. + - [of_bytes ~endian ~signed:true (to_bytes ~endian ~signed:true x) = x] + for all integers [x]. + + The properties still hold if [to_bytes] is used with an explicit [len] + parameter, provided [to_bytes] does not fail on an [Overflow] exception. + + @param endian the endianness for the byte sequence [s]: + [~endian:`Big] for big-endian (most significant byte first); + [~endian:`Little] for little-endian (least significant byte first). + @param signed whether the byte sequence should be read in two's complement. + If [signed] is [true], the most significant bit of [s] is treated + as the sign bit: the result is a negative integer if the sign bit + is 1, and a nonnegative integer if the sign bit is 0. + If [signed] is [false], which is the default, the most significant bit + of [s] has no special meaning. The result is always nonnegative. + @since 1.15 + +*) + external to_bits: t -> string = "ml_z_to_bits" (** Returns a binary representation of the argument. The string result should be interpreted as a sequence of bytes, corresponding to the binary representation of the absolute value of the argument in little endian ordering. The sign is not stored in the string. + The string can contain redundant trailing zero bytes. + + {!Z.to_bytes} can be used instead of [to_bits] + to get better control on endianness, signedness and byte size. *) external of_bits: string -> t = "ml_z_of_bits" @@ -713,6 +763,9 @@ external of_bits: string -> t = "ml_z_of_bits" We have the identity: [of_bits (to_bits x) = abs x]. However, we can have [to_bits (of_bits s) <> s] due to the presence of trailing zeros in s. + + {!Z.of_bytes} can be used instead of [of_bits] + to get better control on endianness and signedness. *) (** {1 Pseudo-random number generation} *)