forked from gigamonkey/monkeylib-binary-data
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommon-datatypes.lisp
More file actions
128 lines (107 loc) · 4.57 KB
/
common-datatypes.lisp
File metadata and controls
128 lines (107 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
;;
;; Copyright (c) 2005, Gigamonkeys Consulting All rights reserved.
;;
(in-package :com.gigamonkeys.binary-data.common-datatypes)
(define-binary-type unsigned-integer (bytes bits-per-byte)
(:reader (in)
(loop with value = 0
for low-bit downfrom (* bits-per-byte (1- bytes)) to 0 by bits-per-byte do
(setf (ldb (byte bits-per-byte low-bit) value) (read-byte in))
finally (return value)))
(:writer (out value)
(loop for low-bit downfrom (* bits-per-byte (1- bytes)) to 0 by bits-per-byte
do (write-byte (ldb (byte bits-per-byte low-bit) value) out))))
(define-binary-type u1 () (unsigned-integer :bytes 1 :bits-per-byte 8))
(define-binary-type u2 () (unsigned-integer :bytes 2 :bits-per-byte 8))
(define-binary-type u3 () (unsigned-integer :bytes 3 :bits-per-byte 8))
(define-binary-type u4 () (unsigned-integer :bytes 4 :bits-per-byte 8))
;;; Strings
(define-binary-type generic-string (length character-type)
(:reader (in)
(let ((string (make-string length)))
(dotimes (i length)
(setf (char string i) (read-value character-type in)))
string))
(:writer (out string)
(dotimes (i length)
(write-value character-type out (char string i)))))
(define-binary-type generic-terminated-string (terminator character-type)
(:reader (in)
(with-output-to-string (s)
(loop for char = (read-value character-type in)
until (char= char terminator) do (write-char char s))))
(:writer (out string)
(loop for char across string
do (write-value character-type out char)
finally (write-value character-type out terminator))))
;;; ISO-8859-1 strings
(define-binary-type iso-8859-1-char ()
(:reader (in)
(let ((code (read-byte in)))
(or (code-char code)
(error "Character code ~d not supported" code))))
(:writer (out char)
(let ((code (char-code char)))
(if (<= 0 code #xff)
(write-byte code out)
(error "Illegal character for iso-8859-1 encoding: character: ~c with code: ~d" char code)))))
(define-binary-type iso-8859-1-string (length)
(generic-string :length length :character-type 'iso-8859-1-char))
(define-binary-type iso-8859-1-terminated-string (terminator)
(generic-terminated-string :terminator terminator :character-type 'iso-8859-1-char))
;;; UCS-2 (Unicode) strings (i.e. UTF-16 without surrogate pairs, phew.)
;;; Define a binary type for reading a UCS-2 character relative to a
;;; particular byte ordering as indicated by the BOM value.
;; v2.3 specifies that the BOM should be present. v2.2 is silent
;; though it is arguably inherent in the definition of UCS-2) Length
;; is in bytesty. On the write side, since we don't have any way of
;; knowing what BOM was used to read the string we just pick one.
;; This does mean roundtrip transparency could be broken.
(define-binary-type ucs-2-char (swap)
(:reader (in)
(let ((code (read-value 'u2 in)))
(when swap (setf code (swap-bytes code)))
(or (code-char code) (error "Character code ~d not supported" code))))
(:writer (out char)
(let ((code (char-code char)))
(unless (<= 0 code #xffff)
(error "Illegal character for ucs-2 encoding: ~c with char-code: ~d" char code))
(when swap (setf code (swap-bytes code)))
(write-value 'u2 out code))))
(defun swap-bytes (code)
(assert (<= code #xffff))
(rotatef (ldb (byte 8 0) code) (ldb (byte 8 8) code))
code)
(define-binary-type ucs-2-char-big-endian () (ucs-2-char :swap nil))
(define-binary-type ucs-2-char-little-endian () (ucs-2-char :swap t))
(defun ucs-2-char-type (byte-order-mark)
(ecase byte-order-mark
(#xfeff 'ucs-2-char-big-endian)
(#xfffe 'ucs-2-char-little-endian)))
(define-binary-type ucs-2-string (length)
(:reader (in)
(let ((byte-order-mark (read-value 'u2 in))
(characters (1- (/ length 2))))
(read-value
'generic-string in
:length characters
:character-type (ucs-2-char-type byte-order-mark))))
(:writer (out string)
(write-value 'u2 out #xfeff)
(write-value
'generic-string out string
:length (length string)
:character-type (ucs-2-char-type #xfeff))))
(define-binary-type ucs-2-terminated-string (terminator)
(:reader (in)
(let ((byte-order-mark (read-value 'u2 in)))
(read-value
'generic-terminated-string in
:terminator terminator
:character-type (ucs-2-char-type byte-order-mark))))
(:writer (out string)
(write-value 'u2 out #xfeff)
(write-value
'generic-terminated-string out string
:terminator terminator
:character-type (ucs-2-char-type #xfeff))))