From 15062e438f4fb8277a88b62b59e254ea91c6db38 Mon Sep 17 00:00:00 2001 From: Christian Stadelmann AU0001TC Date: Tue, 19 Jan 2021 23:47:17 +0100 Subject: [PATCH] Tokenizer: Parse operators one at a time Prior to this change, any line ending with `[punctuation + '...']`, for example `||...`, would cause the tokenizer to fail. Fixes #9 --- tokenize_code.m | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tokenize_code.m b/tokenize_code.m index dd5d628..9506521 100644 --- a/tokenize_code.m +++ b/tokenize_code.m @@ -98,19 +98,11 @@ add_token('property', symbol); % any other operator: else - symbol = skip(punctuation); - % one operator: - if any(strcmp(symbol, operators)) - add_token('punctuation', symbol); - % a binary operator, followed by a unary operator: - elseif any(symbol(end) == unary_operators) && ... - any(strcmp(symbol(1:end-1), operators)) - add_token('punctuation', symbol(1:end-1)); - add_token('punctuation', symbol(end)); + symbol = skip_from_list(operators); % element-wise transpose operator: % This has to be parsed here, so as to not confuse the `'` % with the beginning of a string. - elseif strcmp(symbol, '.') && source_code(pos) == '''' + if strcmp(symbol, '.') && source_code(pos) == '''' pos = pos + 1; add_token('punctuation', '.'''); % struct access operator such as `.(foo)`: @@ -118,9 +110,9 @@ % classify `.(` as such here. elseif strcmp(symbol, '.') && source_code(pos) == '(' add_token('punctuation', '.'); - % this should never happen: + % one operator: else - error(['unknown operator ''' symbol '''']); + add_token('punctuation', symbol); end end % strings and transpose begin with `'`. The `.'` operator has @@ -248,6 +240,19 @@ function add_token(token_type, token_text) string = source_code(string_start:pos-1); end + function string = skip_from_list(letters_cell) + %SKIP_FROM_ARRAY skips and returns letters if they are on a given list + % as STRING. + % this modifies POS! + + string_start = pos; + while any(startsWith(letters_cell, source_code(string_start:pos))) ... + && pos < length(source_code) + pos = pos + 1; + end + string = source_code(string_start:pos-1); + end + function string = skip_line() %SKIP_LINE skips to the end of the line and returns the line as STRING % this modifies POS!