diff --git a/CLAUDE.md b/CLAUDE.md index 580f785..b075068 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -132,6 +132,12 @@ Six packages, one pipeline: | `[arr FOR m]` | `arr[0 : m]` (shorthand slice, FROM 0 implied) | | `[arr FROM n FOR m] := src` | `copy(arr[n:n+m], src)` (slice assignment) | | Nested `PROC`/`FUNCTION` | `name := func(...) { ... }` (Go closure) | +| `VAL x IS 42:` (untyped) | `var x = 42` (Go type inference) | +| `[1, 2, 3]` (array literal) | `[]int{1, 2, 3}` | +| `VAL INT X RETYPES X :` | `X := int(int32(math.Float32bits(float32(X))))` | +| `VAL [2]INT X RETYPES X :` | `X := []int{lo, hi}` via `math.Float64bits` | +| `CAUSEERROR()` | `panic("CAUSEERROR")` | +| `LONGPROD` / `LONGDIV` etc. | Go helper functions using `uint64`/`math/bits` | ## Key Parser Patterns @@ -164,15 +170,15 @@ Typical workflow for a new language construct: ## What's Implemented -Preprocessor (`#IF`/`#ELSE`/`#ENDIF`/`#DEFINE`/`#INCLUDE` with search paths, include guards, include-once deduplication, `#COMMENT`/`#PRAGMA`/`#USE` ignored), module file generation from SConscript (`gen-module` subcommand), SEQ, PAR, IF, WHILE, CASE, ALT (with guards, timer timeouts, and multi-statement bodies with scoped declarations), SKIP, STOP, variable/array/channel/timer declarations, abbreviations (`VAL INT x IS 42:`, `INT y IS z:`, `VAL []BYTE s IS "hi":`), assignments (simple and indexed), channel send/receive, channel arrays (`[n]CHAN OF TYPE` with indexed send/receive and `[]CHAN OF TYPE` proc params), PROC (with VAL, RESULT, reference, CHAN, []CHAN, open array `[]TYPE`, fixed-size array `[n]TYPE`, and shared-type params), channel direction restrictions (`CHAN OF INT c?` → `<-chan int`, `CHAN OF INT c!` → `chan<- int`, call-site annotations `out!`/`in?` accepted), multi-line parameter lists (lexer suppresses INDENT/DEDENT/NEWLINE inside parens), FUNCTION (IS and VALOF forms with multi-statement bodies, including multi-result `INT, INT FUNCTION` with `RESULT a, b`), multi-assignment (`a, b := func(...)` including indexed targets like `x[0], x[1] := x[1], x[0]`), KRoC-style colon terminators on PROC/FUNCTION (optional), replicators on SEQ/PAR/IF (with optional STEP), arithmetic/comparison/logical/AFTER/bitwise operators, type conversions (`INT expr`, `BYTE expr`, `REAL32 expr`, `REAL64 expr`, etc.), REAL32/REAL64 types, hex integer literals (`#FF`, `#80000000`), string literals, byte literals (`'A'`, `'*n'` with occam escape sequences), built-in print procedures, protocols (simple, sequential, and variant), record types (with field access via bracket syntax), SIZE operator, array slices (`[arr FROM n FOR m]` and shorthand `[arr FOR m]` with slice assignment), nested PROCs/FUNCTIONs (local definitions as Go closures), MOSTNEG/MOSTPOS (type min/max constants for INT, BYTE, REAL32, REAL64), INITIAL declarations (`INITIAL INT x IS 42:` — mutable variable with initial value), checked (modular) arithmetic (`PLUS`, `MINUS`, `TIMES` — wrapping operators). +Preprocessor (`#IF`/`#ELSE`/`#ENDIF`/`#DEFINE`/`#INCLUDE` with search paths, include guards, include-once deduplication, `#COMMENT`/`#PRAGMA`/`#USE` ignored), module file generation from SConscript (`gen-module` subcommand), SEQ, PAR, IF, WHILE, CASE, ALT (with guards, timer timeouts, and multi-statement bodies with scoped declarations), SKIP, STOP, variable/array/channel/timer declarations, abbreviations (`VAL INT x IS 42:`, `INT y IS z:`, `VAL []BYTE s IS "hi":`, untyped `VAL x IS expr:`), assignments (simple and indexed), channel send/receive, channel arrays (`[n]CHAN OF TYPE` with indexed send/receive and `[]CHAN OF TYPE` proc params), PROC (with VAL, RESULT, reference, CHAN, []CHAN, open array `[]TYPE`, fixed-size array `[n]TYPE`, and shared-type params), channel direction restrictions (`CHAN OF INT c?` → `<-chan int`, `CHAN OF INT c!` → `chan<- int`, call-site annotations `out!`/`in?` accepted), multi-line parameter lists and expressions (lexer suppresses INDENT/DEDENT/NEWLINE inside parens/brackets and after continuation operators), FUNCTION (IS and VALOF forms with multi-statement bodies, including multi-result `INT, INT FUNCTION` with `RESULT a, b`), multi-assignment (`a, b := func(...)` including indexed targets like `x[0], x[1] := x[1], x[0]`), KRoC-style colon terminators on PROC/FUNCTION (optional), replicators on SEQ/PAR/IF (with optional STEP), arithmetic/comparison/logical/AFTER/bitwise operators, type conversions (`INT expr`, `BYTE expr`, `REAL32 expr`, `REAL64 expr`, etc.), REAL32/REAL64 types, hex integer literals (`#FF`, `#80000000`), string literals, byte literals (`'A'`, `'*n'` with occam escape sequences), built-in print procedures, protocols (simple, sequential, and variant), record types (with field access via bracket syntax), SIZE operator, array slices (`[arr FROM n FOR m]` and shorthand `[arr FOR m]` with slice assignment), array literals (`[1, 2, 3]`), nested PROCs/FUNCTIONs (local definitions as Go closures), MOSTNEG/MOSTPOS (type min/max constants for INT, BYTE, REAL32, REAL64), INITIAL declarations (`INITIAL INT x IS 42:` — mutable variable with initial value), checked (modular) arithmetic (`PLUS`, `MINUS`, `TIMES` — wrapping operators), RETYPES (bit-level type reinterpretation: `VAL INT X RETYPES X :` for float32→int, `VAL [2]INT X RETYPES X :` for float64→int pair), transputer intrinsics (LONGPROD, LONGDIV, LONGSUM, LONGDIFF, NORMALISE, SHIFTRIGHT, SHIFTLEFT — implemented as Go helper functions), CAUSEERROR (maps to `panic("CAUSEERROR")`). ## Course Module Testing -The KRoC course module (`kroc/modules/course/libsrc/course.module`) is a real-world integration test. A reduced version excluding `float_io.occ` is provided: +The KRoC course module (`kroc/modules/course/libsrc/course.module`) is a real-world integration test: ```bash -# Transpile course module (without float_io.occ) -./occam2go -I kroc/modules/course/libsrc -D TARGET.BITS.PER.WORD=32 -o /tmp/course_out.go course_nofloat.module +# Transpile full course module (including float_io.occ) +./occam2go -I kroc/modules/course/libsrc -D TARGET.BITS.PER.WORD=32 -o /tmp/course_out.go kroc/modules/course/libsrc/course.module # Verify Go output compiles (will only fail with "no main" since it's a library) go vet /tmp/course_out.go @@ -180,4 +186,4 @@ go vet /tmp/course_out.go ## Not Yet Implemented -RETYPES (bit-level type reinterpretation), transputer intrinsics (LONGPROD, LONGDIV, LONGSUM, LONGDIFF, NORMALISE, SHIFTRIGHT, SHIFTLEFT), CAUSEERROR, PRI ALT/PRI PAR, PLACED PAR, PORT OF. These are needed to transpile `float_io.occ` (Phase 2). See `TODO.md` for the full list with priorities. +PRI ALT/PRI PAR, PLACED PAR, PORT OF. See `TODO.md` for the full list with priorities. diff --git a/TODO.md b/TODO.md index 881e129..15ad2f8 100644 --- a/TODO.md +++ b/TODO.md @@ -20,7 +20,7 @@ - **Channel arrays** — `[n]CHAN OF TYPE cs:` with indexed send/receive and `[]CHAN OF TYPE` proc params - **Channel direction** — `CHAN OF INT c?` (receive-only) and `CHAN OF INT c!` (send-only); direction annotations at call sites (`out!`, `in?`) accepted and ignored - **Timers** — `TIMER tim:` with reads and `AFTER` expressions -- **Abbreviations** — `VAL INT x IS 1:`, `INT y IS z:` — named constants and aliases +- **Abbreviations** — `VAL INT x IS 1:`, `INT y IS z:`, untyped `VAL x IS expr:` — named constants and aliases - **INITIAL declarations** — `INITIAL INT x IS 42:` — mutable variables with initial values - **Byte literals** — `'A'`, `'0'` with occam escape sequences (`*n`, `*c`, `*t`) - **Hex integer literals** — `#FF`, `#80000000` @@ -49,7 +49,9 @@ - **MOSTNEG/MOSTPOS** — Type min/max constants for INT, BYTE, REAL32, REAL64 - **SIZE operator** — `SIZE arr`, `SIZE "str"` maps to `len()` - **Array slices** — `[arr FROM n FOR m]` with slice assignment +- **Array literals** — `[1, 2, 3]` — inline array/table expressions - **Multi-assignment** — `a, b := f(...)` including indexed targets like `x[0], x[1] := x[1], x[0]` +- **Multi-line expression continuation** — Binary operators and `:=` at end of line continue expression on next line ### Protocols - **Simple** — `PROTOCOL SIG IS INT` (type alias) @@ -59,6 +61,11 @@ ### Records - **RECORD** — Struct types with field access via bracket syntax (`p[x]`) +### Type Reinterpretation & Intrinsics +- **RETYPES** — Bit-level type reinterpretation (`VAL INT X RETYPES X :` for float32→int, `VAL [2]INT X RETYPES X :` for float64→int pair) +- **Transputer intrinsics** — `LONGPROD`, `LONGDIV`, `LONGSUM`, `LONGDIFF`, `NORMALISE`, `SHIFTLEFT`, `SHIFTRIGHT` — extended-precision arithmetic as Go helper functions +- **CAUSEERROR** — Error-raising primitive, maps to `panic("CAUSEERROR")` + ### Preprocessor - **`#IF` / `#ELSE` / `#ENDIF`** — Conditional compilation with `TRUE`, `FALSE`, `DEFINED()`, `NOT`, equality - **`#DEFINE`** — Symbol definition @@ -89,8 +96,5 @@ | **PRI ALT / PRI PAR** | Priority variants of ALT and PAR. | | **PLACED PAR** | Assigning processes to specific hardware. | | **PORT OF** | Hardware port mapping. | -| **`RETYPES`** | Type punning / reinterpret cast (`VAL INT X RETYPES X :`). Used in float_io.occ. | -| **`CAUSEERROR ()`** | Built-in error-raising primitive. Used in float_io.occ. | -| **Transputer intrinsics** | `LONGPROD`, `LONGDIV`, `LONGSUM`, `LONGDIFF`, `NORMALISE`, `SHIFTLEFT`, `SHIFTRIGHT`. Used in float_io.occ. | | **`VAL []BYTE` abbreviations** | `VAL []BYTE cmap IS "0123456789ABCDEF":` — named string constants. | | **`#PRAGMA DEFINED`** | Compiler hint to suppress definedness warnings. Can be ignored. | diff --git a/ast/ast.go b/ast/ast.go index 8f5c269..894ad1c 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -494,3 +494,27 @@ type Abbreviation struct { func (a *Abbreviation) statementNode() {} func (a *Abbreviation) TokenLiteral() string { return a.Token.Literal } + +// ArrayLiteral represents an array literal expression: [expr1, expr2, ...] +type ArrayLiteral struct { + Token lexer.Token // the [ token + Elements []Expression // the elements +} + +func (al *ArrayLiteral) expressionNode() {} +func (al *ArrayLiteral) TokenLiteral() string { return al.Token.Literal } + +// RetypesDecl represents a RETYPES declaration: +// VAL INT X RETYPES X : or VAL [2]INT X RETYPES X : +type RetypesDecl struct { + Token lexer.Token // the VAL token + IsVal bool // always true for now (VAL ... RETYPES ...) + TargetType string // "INT", "REAL32", etc. + IsArray bool // true for [n]TYPE + ArraySize Expression // array size when IsArray + Name string // target variable name + Source string // source variable name +} + +func (r *RetypesDecl) statementNode() {} +func (r *RetypesDecl) TokenLiteral() string { return r.Token.Literal } diff --git a/codegen/codegen.go b/codegen/codegen.go index 5cd8626..e994e39 100644 --- a/codegen/codegen.go +++ b/codegen/codegen.go @@ -16,6 +16,7 @@ type Generator struct { needTime bool // track if we need time package import needOs bool // track if we need os package import needMath bool // track if we need math package import + needMathBits bool // track if we need math/bits package import // Track procedure signatures for proper pointer handling procSigs map[string][]ast.ProcParam @@ -33,6 +34,22 @@ type Generator struct { // Nesting level: 0 = package level, >0 = inside a function nestingLevel int + + // RETYPES parameter renames: when a RETYPES declaration shadows a + // parameter (e.g. VAL INT X RETYPES X :), the parameter is renamed + // in the signature so := can create a new variable with the original name. + retypesRenames map[string]string +} + +// Transputer intrinsic function names +var transpIntrinsics = map[string]bool{ + "LONGPROD": true, + "LONGDIV": true, + "LONGSUM": true, + "LONGDIFF": true, + "NORMALISE": true, + "SHIFTRIGHT": true, + "SHIFTLEFT": true, } // Built-in print procedures @@ -50,8 +67,21 @@ func New() *Generator { // goIdent converts an occam identifier to a valid Go identifier. // Occam allows dots in identifiers (e.g., out.repeat); Go does not. +// goReserved is a set of Go keywords and predeclared identifiers that cannot be +// used as variable names when they also appear as type conversions in the generated code. +var goReserved = map[string]bool{ + "byte": true, "int": true, "string": true, "len": true, "cap": true, + "make": true, "new": true, "copy": true, "close": true, "delete": true, + "panic": true, "recover": true, "print": true, "println": true, + "error": true, "rune": true, "bool": true, "true": true, "false": true, +} + func goIdent(name string) string { - return strings.ReplaceAll(name, ".", "_") + name = strings.ReplaceAll(name, ".", "_") + if goReserved[name] { + return "_" + name + } + return name } // Generate produces Go code from the AST @@ -62,6 +92,7 @@ func (g *Generator) Generate(program *ast.Program) string { g.needTime = false g.needOs = false g.needMath = false + g.needMathBits = false g.procSigs = make(map[string][]ast.ProcParam) g.refParams = make(map[string]bool) g.protocolDefs = make(map[string]*ast.ProtocolDecl) @@ -88,6 +119,12 @@ func (g *Generator) Generate(program *ast.Program) string { if g.containsMostExpr(stmt) { g.needMath = true } + if g.containsIntrinsics(stmt) { + g.needMathBits = true + } + if g.containsRetypes(stmt) { + g.needMath = true + } if proc, ok := stmt.(*ast.ProcDecl); ok { g.procSigs[proc.Name] = proc.Params g.collectNestedProcSigs(proc.Body) @@ -110,7 +147,7 @@ func (g *Generator) Generate(program *ast.Program) string { g.writeLine("") // Write imports - if g.needSync || g.needFmt || g.needTime || g.needOs || g.needMath { + if g.needSync || g.needFmt || g.needTime || g.needOs || g.needMath || g.needMathBits { g.writeLine("import (") g.indent++ if g.needFmt { @@ -119,6 +156,9 @@ func (g *Generator) Generate(program *ast.Program) string { if g.needMath { g.writeLine(`"math"`) } + if g.needMathBits { + g.writeLine(`"math/bits"`) + } if g.needOs { g.writeLine(`"os"`) } @@ -133,6 +173,11 @@ func (g *Generator) Generate(program *ast.Program) string { g.writeLine("") } + // Emit transputer intrinsic helper functions + if g.needMathBits { + g.emitIntrinsicHelpers() + } + // Separate protocol, record, procedure declarations from other statements var typeDecls []ast.Statement var procDecls []ast.Statement @@ -153,7 +198,7 @@ func (g *Generator) Generate(program *ast.Program) string { var abbrDecls []ast.Statement for _, stmt := range program.Statements { - switch stmt.(type) { + switch s := stmt.(type) { case *ast.ProtocolDecl, *ast.RecordDecl: typeDecls = append(typeDecls, stmt) case *ast.ProcDecl, *ast.FuncDecl: @@ -166,6 +211,10 @@ func (g *Generator) Generate(program *ast.Program) string { } else { mainStatements = append(mainStatements, stmt) } + case *ast.RetypesDecl: + _ = s + // RETYPES declarations are local to functions, not package-level + mainStatements = append(mainStatements, stmt) default: mainStatements = append(mainStatements, stmt) } @@ -179,14 +228,22 @@ func (g *Generator) Generate(program *ast.Program) string { // Generate package-level abbreviations (constants) for _, stmt := range abbrDecls { abbr := stmt.(*ast.Abbreviation) - goType := g.occamTypeToGo(abbr.Type) - if abbr.IsOpenArray { - goType = "[]" + goType + if abbr.Type == "" { + // Untyped VAL: let Go infer the type + g.builder.WriteString("var ") + g.write(fmt.Sprintf("%s = ", goIdent(abbr.Name))) + g.generateExpression(abbr.Value) + g.write("\n") + } else { + goType := g.occamTypeToGo(abbr.Type) + if abbr.IsOpenArray { + goType = "[]" + goType + } + g.builder.WriteString("var ") + g.write(fmt.Sprintf("%s %s = ", goIdent(abbr.Name), goType)) + g.generateExpression(abbr.Value) + g.write("\n") } - g.builder.WriteString("var ") - g.write(fmt.Sprintf("%s %s = ", goIdent(abbr.Name), goType)) - g.generateExpression(abbr.Value) - g.write("\n") } if len(abbrDecls) > 0 { g.writeLine("") @@ -217,12 +274,64 @@ func (g *Generator) Generate(program *ast.Program) string { // from nested declarations inside PROC bodies. func (g *Generator) collectNestedProcSigs(stmts []ast.Statement) { for _, stmt := range stmts { - if proc, ok := stmt.(*ast.ProcDecl); ok { - g.procSigs[proc.Name] = proc.Params - g.collectNestedProcSigs(proc.Body) + switch s := stmt.(type) { + case *ast.ProcDecl: + g.procSigs[s.Name] = s.Params + g.collectNestedProcSigs(s.Body) + case *ast.FuncDecl: + g.procSigs[s.Name] = s.Params + g.collectNestedProcSigs(s.Body) + case *ast.SeqBlock: + g.collectNestedProcSigs(s.Statements) + case *ast.ParBlock: + g.collectNestedProcSigs(s.Statements) + case *ast.IfStatement: + for _, c := range s.Choices { + g.collectNestedProcSigs(c.Body) + } + case *ast.WhileLoop: + g.collectNestedProcSigs(s.Body) + case *ast.CaseStatement: + for _, ch := range s.Choices { + g.collectNestedProcSigs(ch.Body) + } } - if fn, ok := stmt.(*ast.FuncDecl); ok { - g.procSigs[fn.Name] = fn.Params + } +} + +// collectNestedProcSigsScoped registers nested proc/func signatures into procSigs +// for the current scope. It saves old values into oldSigs so they can be restored +// after the scope ends (preventing name collisions between same-named nested procs +// in different parent procs). +func (g *Generator) collectNestedProcSigsScoped(stmts []ast.Statement, oldSigs map[string][]ast.ProcParam) { + for _, stmt := range stmts { + switch s := stmt.(type) { + case *ast.ProcDecl: + if _, saved := oldSigs[s.Name]; !saved { + oldSigs[s.Name] = g.procSigs[s.Name] // nil if not previously set + } + g.procSigs[s.Name] = s.Params + g.collectNestedProcSigsScoped(s.Body, oldSigs) + case *ast.FuncDecl: + if _, saved := oldSigs[s.Name]; !saved { + oldSigs[s.Name] = g.procSigs[s.Name] + } + g.procSigs[s.Name] = s.Params + g.collectNestedProcSigsScoped(s.Body, oldSigs) + case *ast.SeqBlock: + g.collectNestedProcSigsScoped(s.Statements, oldSigs) + case *ast.ParBlock: + g.collectNestedProcSigsScoped(s.Statements, oldSigs) + case *ast.IfStatement: + for _, c := range s.Choices { + g.collectNestedProcSigsScoped(c.Body, oldSigs) + } + case *ast.WhileLoop: + g.collectNestedProcSigsScoped(s.Body, oldSigs) + case *ast.CaseStatement: + for _, ch := range s.Choices { + g.collectNestedProcSigsScoped(ch.Body, oldSigs) + } } } } @@ -659,6 +768,12 @@ func (g *Generator) exprNeedsMath(expr ast.Expression) bool { } case *ast.SliceExpr: return g.exprNeedsMath(e.Array) || g.exprNeedsMath(e.Start) || g.exprNeedsMath(e.Length) + case *ast.ArrayLiteral: + for _, elem := range e.Elements { + if g.exprNeedsMath(elem) { + return true + } + } } return false } @@ -757,6 +872,8 @@ func (g *Generator) generateStatement(stmt ast.Statement) { g.generateAbbreviation(s) case *ast.MultiAssignment: g.generateMultiAssignment(s) + case *ast.RetypesDecl: + g.generateRetypesDecl(s) } } @@ -778,6 +895,10 @@ func (g *Generator) generateAbbreviation(abbr *ast.Abbreviation) { g.write(fmt.Sprintf("%s := ", goIdent(abbr.Name))) g.generateExpression(abbr.Value) g.write("\n") + // Suppress "declared and not used" for abbreviations inside function bodies + if g.nestingLevel > 0 { + g.writeLine(fmt.Sprintf("_ = %s", goIdent(abbr.Name))) + } } func (g *Generator) generateChanDecl(decl *ast.ChanDecl) { @@ -1410,7 +1531,7 @@ func (g *Generator) generateProcDecl(proc *ast.ProcDecl) { } } for _, p := range proc.Params { - if !p.IsVal && !p.IsChan && !p.IsChanArray && !p.IsOpenArray { + if !p.IsVal && !p.IsChan && !p.IsChanArray && !p.IsOpenArray && p.ArraySize == "" { newRefParams[p.Name] = true } else { // Own param shadows any inherited ref param with same name @@ -1431,6 +1552,26 @@ func (g *Generator) generateProcDecl(proc *ast.ProcDecl) { } g.refParams = newRefParams + // Scan proc body for RETYPES declarations that shadow parameters. + // When VAL INT X RETYPES X :, Go can't redeclare X in the same scope, + // so we rename the parameter (e.g. X → _rp_X) and let RETYPES declare the original name. + oldRenames := g.retypesRenames + g.retypesRenames = nil + paramNames := make(map[string]bool) + for _, p := range proc.Params { + paramNames[p.Name] = true + } + for _, stmt := range proc.Body { + if rd, ok := stmt.(*ast.RetypesDecl); ok { + if paramNames[rd.Source] && rd.Name == rd.Source { + if g.retypesRenames == nil { + g.retypesRenames = make(map[string]string) + } + g.retypesRenames[rd.Name] = "_rp_" + goIdent(rd.Name) + } + } + } + // Generate function signature params := g.generateProcParams(proc.Params) gName := goIdent(proc.Name) @@ -1443,10 +1584,25 @@ func (g *Generator) generateProcDecl(proc *ast.ProcDecl) { g.indent++ g.nestingLevel++ + // Register nested proc/func signatures for this scope so that calls + // within this proc resolve to the correct (local) signature rather than + // a same-named proc from a different scope. + oldSigs := make(map[string][]ast.ProcParam) + g.collectNestedProcSigsScoped(proc.Body, oldSigs) + for _, stmt := range proc.Body { g.generateStatement(stmt) } + // Restore overwritten signatures + for name, params := range oldSigs { + if params == nil { + delete(g.procSigs, name) + } else { + g.procSigs[name] = params + } + } + g.nestingLevel-- g.indent-- g.writeLine("}") @@ -1454,6 +1610,7 @@ func (g *Generator) generateProcDecl(proc *ast.ProcDecl) { // Restore previous context g.refParams = oldRefParams + g.retypesRenames = oldRenames } func (g *Generator) generateProcParams(params []ast.ProcParam) string { @@ -1467,11 +1624,9 @@ func (g *Generator) generateProcParams(params []ast.ProcParam) string { } else if p.IsOpenArray { goType = "[]" + g.occamTypeToGo(p.Type) } else if p.ArraySize != "" { - // Fixed-size array parameter: [n]TYPE - goType = "[" + p.ArraySize + "]" + g.occamTypeToGo(p.Type) - if !p.IsVal { - goType = "*" + goType - } + // Fixed-size array parameter: use slice for Go compatibility + // (occam [n]TYPE and []TYPE both map to Go slices) + goType = "[]" + g.occamTypeToGo(p.Type) } else { goType = g.occamTypeToGo(p.Type) if !p.IsVal { @@ -1479,7 +1634,11 @@ func (g *Generator) generateProcParams(params []ast.ProcParam) string { goType = "*" + goType } } - parts = append(parts, fmt.Sprintf("%s %s", goIdent(p.Name), goType)) + pName := goIdent(p.Name) + if renamed, ok := g.retypesRenames[p.Name]; ok { + pName = renamed + } + parts = append(parts, fmt.Sprintf("%s %s", pName, goType)) } return strings.Join(parts, ", ") } @@ -1502,6 +1661,12 @@ func (g *Generator) generateProcCall(call *ast.ProcCall) { return } + // Handle CAUSEERROR + if call.Name == "CAUSEERROR" { + g.writeLine(`panic("CAUSEERROR")`) + return + } + g.builder.WriteString(strings.Repeat("\t", g.indent)) g.write(goIdent(call.Name)) g.write("(") @@ -1514,7 +1679,7 @@ func (g *Generator) generateProcCall(call *ast.ProcCall) { g.write(", ") } // If this parameter is not VAL (i.e., pass by reference), take address - // Channels and channel arrays are already reference types, so no & needed + // Channels, channel arrays, open arrays, and fixed-size arrays (mapped to slices) are already reference types if i < len(params) && !params[i].IsVal && !params[i].IsChan && !params[i].IsChanArray && !params[i].IsOpenArray && params[i].ArraySize == "" { g.write("&") } @@ -1579,7 +1744,11 @@ func (g *Generator) generateFuncDecl(fn *ast.FuncDecl) { } func (g *Generator) generateFuncCallExpr(call *ast.FuncCall) { - g.write(goIdent(call.Name)) + if transpIntrinsics[call.Name] { + g.write("_" + call.Name) + } else { + g.write(goIdent(call.Name)) + } g.write("(") params := g.procSigs[call.Name] for i, arg := range call.Args { @@ -1943,6 +2112,8 @@ func (g *Generator) generateExpression(expr ast.Expression) { g.write(")") case *ast.MostExpr: g.generateMostExpr(e) + case *ast.ArrayLiteral: + g.generateArrayLiteral(e) } } @@ -2005,3 +2176,305 @@ func (g *Generator) occamOpToGo(op string) string { return op // +, -, *, /, <, >, <=, >= are the same } } + +// generateArrayLiteral emits a Go slice literal: []int{e1, e2, ...} +func (g *Generator) generateArrayLiteral(al *ast.ArrayLiteral) { + g.write("[]int{") + for i, elem := range al.Elements { + if i > 0 { + g.write(", ") + } + g.generateExpression(elem) + } + g.write("}") +} + +// generateRetypesDecl emits code for a RETYPES declaration. +// VAL INT X RETYPES X : — reinterpret float32/64 bits as int(s) +// When source and target share the same name (shadowing a parameter), the parameter +// has been renamed in the signature (e.g. X → _rp_X) so we can use := with the +// original name to create a new variable. +func (g *Generator) generateRetypesDecl(r *ast.RetypesDecl) { + gName := goIdent(r.Name) + gSource := goIdent(r.Source) + // If the parameter was renamed for RETYPES shadowing, use the renamed source + if renamed, ok := g.retypesRenames[r.Source]; ok { + gSource = renamed + } + if r.IsArray { + // VAL [2]INT X RETYPES X : — split float64 into two int32 words + tmpVar := fmt.Sprintf("_retmp%d", g.tmpCounter) + g.tmpCounter++ + g.writeLine(fmt.Sprintf("%s := math.Float64bits(float64(%s))", tmpVar, gSource)) + g.writeLine(fmt.Sprintf("%s := []int{int(int32(uint32(%s))), int(int32(uint32(%s >> 32)))}", gName, tmpVar, tmpVar)) + } else { + // VAL INT X RETYPES X : — reinterpret float32 as int + g.writeLine(fmt.Sprintf("%s := int(int32(math.Float32bits(float32(%s))))", gName, gSource)) + } +} + +// containsIntrinsics checks if a statement tree contains transputer intrinsic calls. +func (g *Generator) containsIntrinsics(stmt ast.Statement) bool { + return g.walkStatements(stmt, func(e ast.Expression) bool { + if fc, ok := e.(*ast.FuncCall); ok { + return transpIntrinsics[fc.Name] + } + return false + }) +} + +// containsRetypes checks if a statement tree contains RETYPES declarations. +func (g *Generator) containsRetypes(stmt ast.Statement) bool { + switch s := stmt.(type) { + case *ast.RetypesDecl: + return true + case *ast.SeqBlock: + for _, inner := range s.Statements { + if g.containsRetypes(inner) { + return true + } + } + case *ast.ParBlock: + for _, inner := range s.Statements { + if g.containsRetypes(inner) { + return true + } + } + case *ast.ProcDecl: + for _, inner := range s.Body { + if g.containsRetypes(inner) { + return true + } + } + case *ast.FuncDecl: + for _, inner := range s.Body { + if g.containsRetypes(inner) { + return true + } + } + case *ast.WhileLoop: + for _, inner := range s.Body { + if g.containsRetypes(inner) { + return true + } + } + case *ast.IfStatement: + for _, choice := range s.Choices { + if choice.NestedIf != nil && g.containsRetypes(choice.NestedIf) { + return true + } + for _, inner := range choice.Body { + if g.containsRetypes(inner) { + return true + } + } + } + case *ast.CaseStatement: + for _, choice := range s.Choices { + for _, inner := range choice.Body { + if g.containsRetypes(inner) { + return true + } + } + } + } + return false +} + +// walkStatements recursively walks a statement tree, applying fn to all expressions. +// Returns true if fn returns true for any expression. +func (g *Generator) walkStatements(stmt ast.Statement, fn func(ast.Expression) bool) bool { + switch s := stmt.(type) { + case *ast.Assignment: + return g.walkExpr(s.Value, fn) || g.walkExpr(s.Index, fn) + case *ast.MultiAssignment: + for _, v := range s.Values { + if g.walkExpr(v, fn) { + return true + } + } + case *ast.Abbreviation: + return g.walkExpr(s.Value, fn) + case *ast.SeqBlock: + for _, inner := range s.Statements { + if g.walkStatements(inner, fn) { + return true + } + } + case *ast.ParBlock: + for _, inner := range s.Statements { + if g.walkStatements(inner, fn) { + return true + } + } + case *ast.ProcDecl: + for _, inner := range s.Body { + if g.walkStatements(inner, fn) { + return true + } + } + case *ast.FuncDecl: + for _, inner := range s.Body { + if g.walkStatements(inner, fn) { + return true + } + } + case *ast.WhileLoop: + if g.walkExpr(s.Condition, fn) { + return true + } + for _, inner := range s.Body { + if g.walkStatements(inner, fn) { + return true + } + } + case *ast.IfStatement: + for _, choice := range s.Choices { + if choice.NestedIf != nil && g.walkStatements(choice.NestedIf, fn) { + return true + } + if g.walkExpr(choice.Condition, fn) { + return true + } + for _, inner := range choice.Body { + if g.walkStatements(inner, fn) { + return true + } + } + } + case *ast.CaseStatement: + if g.walkExpr(s.Selector, fn) { + return true + } + for _, choice := range s.Choices { + for _, v := range choice.Values { + if g.walkExpr(v, fn) { + return true + } + } + for _, inner := range choice.Body { + if g.walkStatements(inner, fn) { + return true + } + } + } + case *ast.Send: + if g.walkExpr(s.Value, fn) { + return true + } + for _, v := range s.Values { + if g.walkExpr(v, fn) { + return true + } + } + case *ast.ProcCall: + for _, arg := range s.Args { + if g.walkExpr(arg, fn) { + return true + } + } + case *ast.AltBlock: + for _, c := range s.Cases { + for _, inner := range c.Body { + if g.walkStatements(inner, fn) { + return true + } + } + } + case *ast.VariantReceive: + for _, c := range s.Cases { + if c.Body != nil && g.walkStatements(c.Body, fn) { + return true + } + } + } + return false +} + +// walkExpr recursively walks an expression tree, applying fn. +func (g *Generator) walkExpr(expr ast.Expression, fn func(ast.Expression) bool) bool { + if expr == nil { + return false + } + if fn(expr) { + return true + } + switch e := expr.(type) { + case *ast.BinaryExpr: + return g.walkExpr(e.Left, fn) || g.walkExpr(e.Right, fn) + case *ast.UnaryExpr: + return g.walkExpr(e.Right, fn) + case *ast.ParenExpr: + return g.walkExpr(e.Expr, fn) + case *ast.TypeConversion: + return g.walkExpr(e.Expr, fn) + case *ast.SizeExpr: + return g.walkExpr(e.Expr, fn) + case *ast.IndexExpr: + return g.walkExpr(e.Left, fn) || g.walkExpr(e.Index, fn) + case *ast.FuncCall: + for _, arg := range e.Args { + if g.walkExpr(arg, fn) { + return true + } + } + case *ast.SliceExpr: + return g.walkExpr(e.Array, fn) || g.walkExpr(e.Start, fn) || g.walkExpr(e.Length, fn) + case *ast.ArrayLiteral: + for _, elem := range e.Elements { + if g.walkExpr(elem, fn) { + return true + } + } + } + return false +} + +// emitIntrinsicHelpers writes the Go helper functions for transputer intrinsics. +// These implement 32-bit transputer semantics using uint32/uint64 arithmetic. +func (g *Generator) emitIntrinsicHelpers() { + g.writeLine("// Transputer intrinsic helper functions") + g.writeLine("func _LONGPROD(a, b, c int) (int, int) {") + g.writeLine("\tr := uint64(uint32(a))*uint64(uint32(b)) + uint64(uint32(c))") + g.writeLine("\treturn int(int32(uint32(r >> 32))), int(int32(uint32(r)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _LONGDIV(hi, lo, divisor int) (int, int) {") + g.writeLine("\tn := (uint64(uint32(hi)) << 32) | uint64(uint32(lo))") + g.writeLine("\td := uint64(uint32(divisor))") + g.writeLine("\tif d == 0 { panic(\"LONGDIV: division by zero\") }") + g.writeLine("\treturn int(int32(uint32(n / d))), int(int32(uint32(n % d)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _LONGSUM(a, b, carry int) (int, int) {") + g.writeLine("\tr := uint64(uint32(a)) + uint64(uint32(b)) + uint64(uint32(carry))") + g.writeLine("\treturn int(int32(uint32(r >> 32))), int(int32(uint32(r)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _LONGDIFF(a, b, borrow int) (int, int) {") + g.writeLine("\tr := uint64(uint32(a)) - uint64(uint32(b)) - uint64(uint32(borrow))") + g.writeLine("\tif uint32(a) >= uint32(b)+uint32(borrow) { return 0, int(int32(uint32(r))) }") + g.writeLine("\treturn 1, int(int32(uint32(r)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _NORMALISE(hi, lo int) (int, int, int) {") + g.writeLine("\tv := (uint64(uint32(hi)) << 32) | uint64(uint32(lo))") + g.writeLine("\tif v == 0 { return 64, 0, 0 }") + g.writeLine("\tn := bits.LeadingZeros64(v)") + g.writeLine("\tv <<= uint(n)") + g.writeLine("\treturn n, int(int32(uint32(v >> 32))), int(int32(uint32(v)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _SHIFTRIGHT(hi, lo, n int) (int, int) {") + g.writeLine("\tv := (uint64(uint32(hi)) << 32) | uint64(uint32(lo))") + g.writeLine("\tv >>= uint(uint32(n))") + g.writeLine("\treturn int(int32(uint32(v >> 32))), int(int32(uint32(v)))") + g.writeLine("}") + g.writeLine("") + g.writeLine("func _SHIFTLEFT(hi, lo, n int) (int, int) {") + g.writeLine("\tv := (uint64(uint32(hi)) << 32) | uint64(uint32(lo))") + g.writeLine("\tv <<= uint(uint32(n))") + g.writeLine("\treturn int(int32(uint32(v >> 32))), int(int32(uint32(v)))") + g.writeLine("}") + g.writeLine("") +} diff --git a/codegen/codegen_test.go b/codegen/codegen_test.go index ecd7d7d..d4610ee 100644 --- a/codegen/codegen_test.go +++ b/codegen/codegen_test.go @@ -612,3 +612,47 @@ func TestMultiAssignmentMixed(t *testing.T) { t.Errorf("expected 'a, x[0] = 1, 2' in output, got:\n%s", output) } } + +func TestArrayLiteralCodegen(t *testing.T) { + input := `VAL x IS [10, 20, 30] : +` + output := transpile(t, input) + if !strings.Contains(output, "[]int{10, 20, 30}") { + t.Errorf("expected '[]int{10, 20, 30}' in output, got:\n%s", output) + } +} + +func TestUntypedValCodegen(t *testing.T) { + input := `VAL x IS 42 : +PROC dummy() + SKIP +: +` + output := transpile(t, input) + if !strings.Contains(output, "var x = 42") { + t.Errorf("expected 'var x = 42' in output, got:\n%s", output) + } +} + +func TestCAUSEERROR(t *testing.T) { + input := `PROC main() + CAUSEERROR() +: +` + output := transpile(t, input) + if !strings.Contains(output, `panic("CAUSEERROR")`) { + t.Errorf("expected 'panic(\"CAUSEERROR\")' in output, got:\n%s", output) + } +} + +func TestGoIdentByteReserved(t *testing.T) { + input := `PROC main() + BYTE byte: + byte := 65 +: +` + output := transpile(t, input) + if !strings.Contains(output, "_byte") { + t.Errorf("expected '_byte' in output, got:\n%s", output) + } +} diff --git a/codegen/e2e_phase2_test.go b/codegen/e2e_phase2_test.go new file mode 100644 index 0000000..761aede --- /dev/null +++ b/codegen/e2e_phase2_test.go @@ -0,0 +1,102 @@ +package codegen + +import ( + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/codeassociates/occam2go/lexer" + "github.com/codeassociates/occam2go/parser" +) + +func TestE2E_UntypedValAbbreviation(t *testing.T) { + occam := `SEQ + VAL x IS 42 : + print.int(x) +` + output := transpileCompileRun(t, occam) + expected := "42\n" + if output != expected { + t.Errorf("expected %q, got %q", expected, output) + } +} + +func TestE2E_ArrayLiteralIndexing(t *testing.T) { + occam := `SEQ + VAL arr IS [10, 20, 30] : + print.int(arr[1]) +` + output := transpileCompileRun(t, occam) + expected := "20\n" + if output != expected { + t.Errorf("expected %q, got %q", expected, output) + } +} + +func TestE2E_MultiLineBooleanIF(t *testing.T) { + occam := `SEQ + INT x: + x := 1 + IF + (x > 0) AND + (x < 10) + print.int(x) + TRUE + print.int(0) +` + output := transpileCompileRun(t, occam) + expected := "1\n" + if output != expected { + t.Errorf("expected %q, got %q", expected, output) + } +} + +func TestE2E_CAUSEERROR(t *testing.T) { + occamSource := `PROC main() + CAUSEERROR() +: +` + // Transpile + l := lexer.New(occamSource) + p := parser.New(l) + program := p.ParseProgram() + + if len(p.Errors()) > 0 { + for _, err := range p.Errors() { + t.Errorf("parser error: %s", err) + } + t.FailNow() + } + + gen := New() + goCode := gen.Generate(program) + + // Create temp directory + tmpDir, err := os.MkdirTemp("", "occam2go-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Write Go source + goFile := filepath.Join(tmpDir, "main.go") + if err := os.WriteFile(goFile, []byte(goCode), 0644); err != nil { + t.Fatalf("failed to write Go file: %v", err) + } + + // Compile + binFile := filepath.Join(tmpDir, "main") + compileCmd := exec.Command("go", "build", "-o", binFile, goFile) + compileOutput, err := compileCmd.CombinedOutput() + if err != nil { + t.Fatalf("compilation failed: %v\nOutput: %s\nGo code:\n%s", err, compileOutput, goCode) + } + + // Run — expect non-zero exit code (panic) + runCmd := exec.Command(binFile) + err = runCmd.Run() + if err == nil { + t.Fatalf("expected CAUSEERROR to cause a non-zero exit, but program exited successfully") + } +} diff --git a/lexer/lexer.go b/lexer/lexer.go index a7743ac..c58b185 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -17,8 +17,13 @@ type Lexer struct { pendingTokens []Token // tokens to emit before reading more input atLineStart bool - // Parenthesis depth: suppress INDENT/DEDENT/NEWLINE inside (...) + // Parenthesis/bracket depth: suppress INDENT/DEDENT/NEWLINE inside (...) and [...] parenDepth int + + // Last real token type for continuation detection. + // When the last token is a binary operator or :=, NEWLINE and INDENT/DEDENT + // are suppressed on the next line (multi-line expression continuation). + lastTokenType TokenType } func New(input string) *Lexer { @@ -52,6 +57,15 @@ func (l *Lexer) peekChar() byte { } func (l *Lexer) NextToken() Token { + tok := l.nextTokenInner() + // Track last real token type for continuation detection + if tok.Type != NEWLINE && tok.Type != INDENT && tok.Type != DEDENT && tok.Type != EOF { + l.lastTokenType = tok.Type + } + return tok +} + +func (l *Lexer) nextTokenInner() Token { // Return any pending tokens first (from indentation processing) if len(l.pendingTokens) > 0 { tok := l.pendingTokens[0] @@ -65,9 +79,10 @@ func (l *Lexer) NextToken() Token { indent := l.measureIndent() currentIndent := l.indentStack[len(l.indentStack)-1] - if l.parenDepth > 0 { - // Inside parentheses: suppress INDENT/DEDENT tokens - // (don't modify indentStack — resume normal tracking after close paren) + if l.parenDepth > 0 || isContinuationOp(l.lastTokenType) { + // Inside parentheses/brackets or after a continuation operator: + // suppress INDENT/DEDENT tokens + // (don't modify indentStack — resume normal tracking after) } else if indent > currentIndent { l.indentStack = append(l.indentStack, indent) return Token{Type: INDENT, Literal: "", Line: l.line, Column: 1} @@ -101,8 +116,12 @@ func (l *Lexer) NextToken() Token { } tok = l.newToken(RPAREN, l.ch) case '[': + l.parenDepth++ tok = l.newToken(LBRACKET, l.ch) case ']': + if l.parenDepth > 0 { + l.parenDepth-- + } tok = l.newToken(RBRACKET, l.ch) case ',': tok = l.newToken(COMMA, l.ch) @@ -217,8 +236,9 @@ func (l *Lexer) NextToken() Token { l.skipToEndOfLine() } } - if l.parenDepth > 0 { - // Inside parentheses: suppress NEWLINE, get next real token + if l.parenDepth > 0 || isContinuationOp(l.lastTokenType) { + // Inside parentheses/brackets or after a continuation operator: + // suppress NEWLINE, get next real token return l.NextToken() } tok = Token{Type: NEWLINE, Literal: "\\n", Line: l.line, Column: l.column} @@ -392,6 +412,22 @@ func isHexDigit(ch byte) bool { return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') } +// isContinuationOp returns true if the given token type, when appearing at the +// end of a line, indicates that the expression continues on the next line. +// This causes NEWLINE and INDENT/DEDENT suppression on the continuation line. +func isContinuationOp(t TokenType) bool { + switch t { + case AND, OR, + PLUS, MINUS, MULTIPLY, DIVIDE, MODULO, + PLUS_KW, MINUS_KW, TIMES, + EQ, NEQ, LT, GT, LE, GE, + BITAND, BITOR, BITXOR, LSHIFT, RSHIFT, + ASSIGN, AFTER: + return true + } + return false +} + // Tokenize returns all tokens from the input func Tokenize(input string) []Token { // Ensure input ends with newline for consistent processing diff --git a/lexer/token.go b/lexer/token.go index 5e5cddf..0bafe0f 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -90,6 +90,7 @@ const ( MOSTNEG_KW MOSTPOS_KW INITIAL + RETYPES // RETYPES (bit-level type reinterpretation) PLUS_KW // PLUS (modular addition keyword, distinct from + symbol) MINUS_KW // MINUS (modular subtraction keyword, distinct from - symbol) TIMES // TIMES (modular multiplication keyword) @@ -178,6 +179,7 @@ var tokenNames = map[TokenType]string{ MOSTNEG_KW: "MOSTNEG", MOSTPOS_KW: "MOSTPOS", INITIAL: "INITIAL", + RETYPES: "RETYPES", PLUS_KW: "PLUS", MINUS_KW: "MINUS", TIMES: "TIMES", @@ -224,6 +226,7 @@ var keywords = map[string]TokenType{ "MOSTNEG": MOSTNEG_KW, "MOSTPOS": MOSTPOS_KW, "INITIAL": INITIAL, + "RETYPES": RETYPES, "PLUS": PLUS_KW, "MINUS": MINUS_KW, "TIMES": TIMES, diff --git a/parser/parser.go b/parser/parser.go index d81d674..22e63cb 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -312,10 +312,14 @@ func (p *Parser) parseVarDeclOrAbbreviation() ast.Statement { return decl } -// parseAbbreviation parses a VAL abbreviation: VAL INT x IS expr: -// Also handles VAL []BYTE x IS "string": (open array abbreviation) +// parseAbbreviation parses VAL abbreviations: +// VAL INT x IS expr: (typed VAL abbreviation) +// VAL []BYTE x IS "string": (open array abbreviation) +// VAL x IS expr: (untyped VAL abbreviation) +// VAL INT X RETYPES X : (RETYPES declaration) +// VAL [n]INT X RETYPES X : (array RETYPES declaration) // Current token is VAL. -func (p *Parser) parseAbbreviation() *ast.Abbreviation { +func (p *Parser) parseAbbreviation() ast.Statement { token := p.curToken // VAL token p.nextToken() @@ -328,6 +332,39 @@ func (p *Parser) parseAbbreviation() *ast.Abbreviation { p.nextToken() // move to type } + // Check for [n]TYPE (fixed-size array, used in RETYPES) + isArray := false + var arraySize ast.Expression + if !isOpenArray && p.curTokenIs(lexer.LBRACKET) { + // Could be [n]TYPE name RETYPES ... + isArray = true + p.nextToken() // move past [ + arraySize = p.parseExpression(LOWEST) + if !p.expectPeek(lexer.RBRACKET) { + return nil + } + p.nextToken() // move to type + } + + // Check for untyped VAL abbreviation: VAL name IS expr : + // Detect: curToken is IDENT and peekToken is IS (no type keyword) + if !isOpenArray && !isArray && p.curTokenIs(lexer.IDENT) && p.peekTokenIs(lexer.IS) { + name := p.curToken.Literal + p.nextToken() // consume IS + p.nextToken() // move to expression + value := p.parseExpression(LOWEST) + if !p.expectPeek(lexer.COLON) { + return nil + } + return &ast.Abbreviation{ + Token: token, + IsVal: true, + Type: "", + Name: name, + Value: value, + } + } + // Expect a type keyword if !isTypeToken(p.curToken.Type) { p.addError(fmt.Sprintf("expected type after VAL, got %s", p.curToken.Type)) @@ -341,6 +378,27 @@ func (p *Parser) parseAbbreviation() *ast.Abbreviation { } name := p.curToken.Literal + // Check for RETYPES (instead of IS) + if p.peekTokenIs(lexer.RETYPES) { + p.nextToken() // consume RETYPES + if !p.expectPeek(lexer.IDENT) { + return nil + } + source := p.curToken.Literal + if !p.expectPeek(lexer.COLON) { + return nil + } + return &ast.RetypesDecl{ + Token: token, + IsVal: true, + TargetType: typeName, + IsArray: isArray, + ArraySize: arraySize, + Name: name, + Source: source, + } + } + // Expect IS if !p.expectPeek(lexer.IS) { return nil @@ -2589,34 +2647,60 @@ func (p *Parser) parseExpression(precedence int) ast.Expression { Right: p.parseExpression(PREFIX), } case lexer.LBRACKET: - // Slice expression: [arr FROM start FOR length] or [arr FOR length] + // Could be: [arr FROM start FOR length], [arr FOR length], or [expr, expr, ...] array literal lbracket := p.curToken p.nextToken() // move past [ - arrayExpr := p.parseExpression(LOWEST) - var startExpr ast.Expression - if p.peekTokenIs(lexer.FOR) { - // [arr FOR length] shorthand — start is 0 - startExpr = &ast.IntegerLiteral{Token: lexer.Token{Type: lexer.INT, Literal: "0"}, Value: 0} + firstExpr := p.parseExpression(LOWEST) + + if p.peekTokenIs(lexer.COMMA) { + // Array literal: [expr, expr, ...] + elements := []ast.Expression{firstExpr} + for p.peekTokenIs(lexer.COMMA) { + p.nextToken() // consume comma + p.nextToken() // move to next element + elements = append(elements, p.parseExpression(LOWEST)) + } + if !p.expectPeek(lexer.RBRACKET) { + return nil + } + left = &ast.ArrayLiteral{ + Token: lbracket, + Elements: elements, + } + } else if p.peekTokenIs(lexer.RBRACKET) { + // Single-element array literal: [expr] + p.nextToken() // consume ] + left = &ast.ArrayLiteral{ + Token: lbracket, + Elements: []ast.Expression{firstExpr}, + } } else { - if !p.expectPeek(lexer.FROM) { + // Slice expression: [arr FROM start FOR length] or [arr FOR length] + var startExpr ast.Expression + if p.peekTokenIs(lexer.FOR) { + // [arr FOR length] shorthand — start is 0 + startExpr = &ast.IntegerLiteral{Token: lexer.Token{Type: lexer.INT, Literal: "0"}, Value: 0} + } else { + if !p.expectPeek(lexer.FROM) { + return nil + } + p.nextToken() // move past FROM + startExpr = p.parseExpression(LOWEST) + } + if !p.expectPeek(lexer.FOR) { return nil } - p.nextToken() // move past FROM - startExpr = p.parseExpression(LOWEST) - } - if !p.expectPeek(lexer.FOR) { - return nil - } - p.nextToken() // move past FOR - lengthExpr := p.parseExpression(LOWEST) - if !p.expectPeek(lexer.RBRACKET) { - return nil - } - left = &ast.SliceExpr{ - Token: lbracket, - Array: arrayExpr, - Start: startExpr, - Length: lengthExpr, + p.nextToken() // move past FOR + lengthExpr := p.parseExpression(LOWEST) + if !p.expectPeek(lexer.RBRACKET) { + return nil + } + left = &ast.SliceExpr{ + Token: lbracket, + Array: firstExpr, + Start: startExpr, + Length: lengthExpr, + } } case lexer.SIZE_KW: token := p.curToken diff --git a/parser/parser_test.go b/parser/parser_test.go index 87eece9..dd34074 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2873,3 +2873,213 @@ func TestChannelDirAtCallSite(t *testing.T) { t.Errorf("expected arg 1 = 'in', got %q", arg1.Value) } } + +func TestUntypedValAbbreviation(t *testing.T) { + input := `VAL x IS 42 : +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + abbr, ok := program.Statements[0].(*ast.Abbreviation) + if !ok { + t.Fatalf("expected Abbreviation, got %T", program.Statements[0]) + } + + if !abbr.IsVal { + t.Error("expected IsVal to be true") + } + if abbr.Type != "" { + t.Errorf("expected empty type, got %q", abbr.Type) + } + if abbr.Name != "x" { + t.Errorf("expected name 'x', got %s", abbr.Name) + } + if abbr.Value == nil { + t.Fatal("expected non-nil Value") + } + lit, ok := abbr.Value.(*ast.IntegerLiteral) + if !ok { + t.Fatalf("expected IntegerLiteral, got %T", abbr.Value) + } + if lit.Value != 42 { + t.Errorf("expected value 42, got %d", lit.Value) + } +} + +func TestArrayLiteral(t *testing.T) { + input := `VAL x IS [1, 2, 3] : +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + abbr, ok := program.Statements[0].(*ast.Abbreviation) + if !ok { + t.Fatalf("expected Abbreviation, got %T", program.Statements[0]) + } + + if abbr.Value == nil { + t.Fatal("expected non-nil Value") + } + + arr, ok := abbr.Value.(*ast.ArrayLiteral) + if !ok { + t.Fatalf("expected ArrayLiteral, got %T", abbr.Value) + } + + if len(arr.Elements) != 3 { + t.Fatalf("expected 3 elements, got %d", len(arr.Elements)) + } + + for i, expected := range []int64{1, 2, 3} { + lit, ok := arr.Elements[i].(*ast.IntegerLiteral) + if !ok { + t.Fatalf("element %d: expected IntegerLiteral, got %T", i, arr.Elements[i]) + } + if lit.Value != expected { + t.Errorf("element %d: expected %d, got %d", i, expected, lit.Value) + } + } +} + +func TestRetypesDecl(t *testing.T) { + input := `VAL INT X RETYPES Y : +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + rt, ok := program.Statements[0].(*ast.RetypesDecl) + if !ok { + t.Fatalf("expected RetypesDecl, got %T", program.Statements[0]) + } + + if !rt.IsVal { + t.Error("expected IsVal to be true") + } + if rt.TargetType != "INT" { + t.Errorf("expected TargetType 'INT', got %q", rt.TargetType) + } + if rt.Name != "X" { + t.Errorf("expected Name 'X', got %q", rt.Name) + } + if rt.Source != "Y" { + t.Errorf("expected Source 'Y', got %q", rt.Source) + } + if rt.IsArray { + t.Error("expected IsArray to be false") + } +} + +func TestRetypesDeclArray(t *testing.T) { + input := `VAL [2]INT X RETYPES Y : +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + rt, ok := program.Statements[0].(*ast.RetypesDecl) + if !ok { + t.Fatalf("expected RetypesDecl, got %T", program.Statements[0]) + } + + if !rt.IsVal { + t.Error("expected IsVal to be true") + } + if rt.TargetType != "INT" { + t.Errorf("expected TargetType 'INT', got %q", rt.TargetType) + } + if rt.Name != "X" { + t.Errorf("expected Name 'X', got %q", rt.Name) + } + if rt.Source != "Y" { + t.Errorf("expected Source 'Y', got %q", rt.Source) + } + if !rt.IsArray { + t.Error("expected IsArray to be true") + } + if rt.ArraySize == nil { + t.Fatal("expected non-nil ArraySize") + } + sizelit, ok := rt.ArraySize.(*ast.IntegerLiteral) + if !ok { + t.Fatalf("expected IntegerLiteral for ArraySize, got %T", rt.ArraySize) + } + if sizelit.Value != 2 { + t.Errorf("expected ArraySize 2, got %d", sizelit.Value) + } +} + +func TestMultiLineBooleanExpression(t *testing.T) { + input := `PROC test() + INT x: + IF + TRUE AND + TRUE + x := 1 + TRUE + x := 2 +: +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + proc, ok := program.Statements[0].(*ast.ProcDecl) + if !ok { + t.Fatalf("expected ProcDecl, got %T", program.Statements[0]) + } + + // Body should have VarDecl + IfStatement + if len(proc.Body) < 2 { + t.Fatalf("expected at least 2 body statements, got %d", len(proc.Body)) + } + + if _, ok := proc.Body[0].(*ast.VarDecl); !ok { + t.Errorf("expected VarDecl at index 0, got %T", proc.Body[0]) + } + + ifStmt, ok := proc.Body[1].(*ast.IfStatement) + if !ok { + t.Fatalf("expected IfStatement at index 1, got %T", proc.Body[1]) + } + + if len(ifStmt.Choices) != 2 { + t.Fatalf("expected 2 choices, got %d", len(ifStmt.Choices)) + } + + // First choice condition should be a BinaryExpr (TRUE AND TRUE) + binExpr, ok := ifStmt.Choices[0].Condition.(*ast.BinaryExpr) + if !ok { + t.Fatalf("expected BinaryExpr for first choice condition, got %T", ifStmt.Choices[0].Condition) + } + if binExpr.Operator != "AND" { + t.Errorf("expected operator 'AND', got %q", binExpr.Operator) + } +}