From de8da6e0edcb385ac34940e8712e1f58cbca2b08 Mon Sep 17 00:00:00 2001 From: Luke Francl Date: Tue, 10 Feb 2026 13:33:05 -0800 Subject: [PATCH] fix: crash when parsing heredocs with identifiers >= 256 chars The heredoc word length was serialized as a single byte, which silently truncated identifiers of >= 256 characters. The deserializer would then read the wrong length, leaving unread bytes in the buffer, and hit the assert(size == length) check. --- src/scanner.c | 9 ++++++--- test/corpus/literals.txt | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 5fb4f976..d5835856 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -107,13 +107,14 @@ static inline unsigned serialize(Scanner *scanner, char *buffer) { buffer[size++] = (char)scanner->open_heredocs.size; for (uint32_t i = 0; i < scanner->open_heredocs.size; i++) { Heredoc *heredoc = array_get(&scanner->open_heredocs, i); - if (size + 2 + heredoc->word.size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + if (size + 3 + sizeof(uint32_t) + heredoc->word.size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { return 0; } buffer[size++] = (char)heredoc->end_word_indentation_allowed; buffer[size++] = (char)heredoc->allows_interpolation; buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->word.size; + memcpy(&buffer[size], &heredoc->word.size, sizeof(uint32_t)); + size += sizeof(uint32_t); memcpy(&buffer[size], heredoc->word.contents, heredoc->word.size); size += heredoc->word.size; } @@ -149,7 +150,9 @@ static inline void deserialize(Scanner *scanner, const char *buffer, unsigned le heredoc.started = buffer[size++]; heredoc.word = (String)array_new(); - uint8_t word_length = buffer[size++]; + uint32_t word_length; + memcpy(&word_length, &buffer[size], sizeof(uint32_t)); + size += sizeof(uint32_t); array_reserve(&heredoc.word, word_length); memcpy(heredoc.word.contents, &buffer[size], word_length); heredoc.word.size = word_length; diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt index 95efd9f6..b4f59a34 100644 --- a/test/corpus/literals.txt +++ b/test/corpus/literals.txt @@ -1049,6 +1049,22 @@ heredoc content (heredoc_content) (heredoc_end))) +======================================== +heredoc with long identifier (>255 chars) +======================================== + +<<~AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +content +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + +--- + +(program + (heredoc_beginning) + (heredoc_body + (heredoc_content) + (heredoc_end))) + ======================================== heredoc with interspersed end word ========================================