Skip to content
60 changes: 55 additions & 5 deletions internal/parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ func (l *lexer) lexUntilNewline() (string, int, int) {
// that the next rune to be served by the lexer will be a leading digit.
func (l *lexer) lexNumber() error {
// This function should be understood with reference to the linked image:
// http://www.json.org/number.gif
// https://www.json.org/img/number.png

// Note, we deviate from the json.org documentation as follows:
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
Expand All @@ -463,13 +463,17 @@ func (l *lexer) lexNumber() error {
numBegin numLexState = iota
numAfterZero
numAfterOneToNine
numAfterIntUnderscore
numAfterDot
numAfterDigit
numAfterFracUnderscore
numAfterE
numAfterExpSign
numAfterExpDigit
numAfterExpUnderscore
)

var cb bytes.Buffer
state := numBegin

outerLoop:
Expand All @@ -492,6 +496,10 @@ outerLoop:
state = numAfterDot
case 'e', 'E':
state = numAfterE
case '_':
return l.makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex number, _ not allowed after leading 0"),
l.location())
default:
break outerLoop
}
Expand All @@ -503,9 +511,21 @@ outerLoop:
state = numAfterE
case r >= '0' && r <= '9':
state = numAfterOneToNine
case r == '_':
state = numAfterIntUnderscore
default:
break outerLoop
}
case numAfterIntUnderscore:
// The only valid transition out of _ is to a digit.
switch {
case r >= '0' && r <= '9':
state = numAfterOneToNine
default:
return l.makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
l.location())
}
case numAfterDot:
switch {
case r >= '0' && r <= '9':
Expand All @@ -521,9 +541,21 @@ outerLoop:
state = numAfterE
case r >= '0' && r <= '9':
state = numAfterDigit
case r == '_':
state = numAfterFracUnderscore
default:
break outerLoop
}
case numAfterFracUnderscore:
// The only valid transition out of _ is to a digit.
switch {
case r >= '0' && r <= '9':
state = numAfterDigit
default:
return l.makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
l.location())
}
case numAfterE:
switch {
case r == '+' || r == '-':
Expand All @@ -545,16 +577,35 @@ outerLoop:
}

case numAfterExpDigit:
if r >= '0' && r <= '9' {
switch {
case r >= '0' && r <= '9':
state = numAfterExpDigit
} else {
case r == '_':
state = numAfterExpUnderscore
default:
break outerLoop
}

case numAfterExpUnderscore:
// The only valid transition out of _ is to a digit.
switch {
case r >= '0' && r <= '9':
state = numAfterExpDigit
default:
return l.makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
l.location())
}
}

if r != '_' {
cb.WriteRune(r)
}
l.next()
}

l.emitToken(tokenNumber)
l.emitFullToken(tokenNumber, cb.String(), "", "")
l.resetTokenStart()
return nil
}

Expand Down Expand Up @@ -978,7 +1029,6 @@ func Lex(diagnosticFilename ast.DiagnosticFileName, importedFilename, input stri
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
l.location())
}

}
}

Expand Down
59 changes: 59 additions & 0 deletions internal/parser/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,16 @@ func TestNumber(t *testing.T) {
{kind: tokenOperator, data: "+"},
{kind: tokenNumber, data: "10"},
}},
{"1.2.3.4", "", Tokens{
{kind: tokenNumber, data: "1.2"},
{kind: tokenDot, data: "."},
{kind: tokenNumber, data: "3.4"},
}},
{"1e2.34", "", Tokens{
{kind: tokenNumber, data: "1e2"},
{kind: tokenDot, data: "."},
{kind: tokenNumber, data: "34"},
}},
{"1.+3", "snippet:1:3 Couldn't lex number, junk after decimal point: '+'", Tokens{}},
{"1e!", "snippet:1:3 Couldn't lex number, junk after 'E': '!'", Tokens{}},
{"1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{}},
Expand All @@ -268,6 +278,49 @@ func TestNumber(t *testing.T) {
}
}

func TestNumberSeparators(t *testing.T) {
for _, c := range []struct {
input string
err string
tokens Tokens
}{
{"123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}},
{"1_750_000", "", Tokens{{kind: tokenNumber, data: "1750000"}}},
{"1_2_3", "", Tokens{{kind: tokenNumber, data: "123"}}},
{"3.141_592", "", Tokens{{kind: tokenNumber, data: "3.141592"}}},
{"01_100", "", Tokens{{kind: tokenNumber, data: "0"}, {kind: tokenNumber, data: "1100"}}},
{"1_200.0", "", Tokens{{kind: tokenNumber, data: "1200.0"}}},
{"0e1_01", "", Tokens{{kind: tokenNumber, data: "0e101"}}},
{"10_10e3", "", Tokens{{kind: tokenNumber, data: "1010e3"}}},
{"2_3e1_2", "", Tokens{{kind: tokenNumber, data: "23e12"}}},
{"1.1_2e100", "", Tokens{{kind: tokenNumber, data: "1.12e100"}}},
{"1.1e-10_1", "", Tokens{{kind: tokenNumber, data: "1.1e-101"}}},
{"9.109_383_56e-31", "", Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}},
{"1_2.3_4.5_6.7_8", "", Tokens{
{kind: tokenNumber, data: "12.34"},
{kind: tokenDot, data: "."},
{kind: tokenNumber, data: "56.78"},
}},
{"1e2_3e4", "", Tokens{
{kind: tokenNumber, data: "1e23"},
{kind: tokenIdentifier, data: "e4"},
}},
{"0_5", "snippet:1:2 Couldn't lex number, _ not allowed after leading 0", Tokens{}},
{"123456_!", "snippet:1:8 Couldn't lex number, junk after '_': '!'", Tokens{}},
{"123__456", "snippet:1:5 Couldn't lex number, junk after '_': '_'", Tokens{}},
{"1_200_.0", "snippet:1:7 Couldn't lex number, junk after '_': '.'", Tokens{}},
{"1_200._0", "snippet:1:7 Couldn't lex number, junk after decimal point: '_'", Tokens{}},
{"1_200_e2", "snippet:1:7 Couldn't lex number, junk after '_': 'e'", Tokens{}},
{"1_200e_2", "snippet:1:7 Couldn't lex number, junk after 'E': '_'", Tokens{}},
{"200e-_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}},
{"200e+_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}},
} {
t.Run(fmt.Sprintf("number %s", c.input), func(t *testing.T) {
SingleTest(t, c.input, c.err, c.tokens)
})
}
}

func TestDoublestring1(t *testing.T) {
SingleTest(t, "\"hi\"", "", Tokens{
{kind: tokenStringDouble, data: "hi"},
Expand Down Expand Up @@ -445,6 +498,12 @@ func TestIdentifiers(t *testing.T) {
})
}

func TestIdentifierUnderscore(t *testing.T) {
SingleTest(t, "_123", "", Tokens{
{kind: tokenIdentifier, data: "_123"},
})
}

func TestCppComment(t *testing.T) {
SingleTest(t, "// hi", "", Tokens{
{kind: tokenEndOfFile, fodder: ast.Fodder{{Kind: ast.FodderParagraph, Comment: []string{"// hi"}}}},
Expand Down
15 changes: 15 additions & 0 deletions testdata/digitsep.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"test_results": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
]
}
19 changes: 19 additions & 0 deletions testdata/digitsep.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
local cases = [
[123456, '123_456'],
[1750000, '1_750_000'],
[123, '1_2_3'],
[3.141592, '3.141_592'],
[1200.0, '1_200.0'],
[0e101, '0e1_01'],
[1010e3, '10_10e3'],
[23e12, '2_3e1_2'],
[1.12e100, '1.1_2e100'],
[1.1e-101, '1.1e-10_1'],
[9.10938356e-31, '9.109_383_56e-31'],
];

local sepParse(s) = std.parseJson(std.strReplace(s, '_', ''));

{
test_results: [std.assertEqual(c[0], sepParse(c[1])) for c in cases],
}
Empty file added testdata/digitsep.linter.golden
Empty file.