From 4820ccc4bd8d3e60e606c9ab1ffc5211ed299c34 Mon Sep 17 00:00:00 2001 From: Associate 1 Date: Thu, 19 Feb 2026 22:53:16 -0700 Subject: [PATCH] Enable running course module programs end-to-end Convert occam string escape sequences (*c, *n, *t, *s, **, *', *") to real bytes in the parser, and generate a main() harness for PROCs with the standard entry point signature (CHAN BYTE keyboard?, screen!, error!) that wires stdin/stdout/stderr to byte channels. Programs using the KRoC course module can now be transpiled, compiled, and run. Co-Authored-By: Claude Opus 4.6 --- README.md | 65 +++++++++++- codegen/codegen.go | 209 ++++++++++++++++++++++++++++++------- codegen/codegen_test.go | 11 ++ codegen/e2e_course_test.go | 96 +++++++++++++++++ examples/course_hello.occ | 36 +++++++ parser/parser.go | 37 ++++++- parser/parser_test.go | 39 +++++++ 7 files changed, 455 insertions(+), 38 deletions(-) create mode 100644 codegen/e2e_course_test.go create mode 100644 examples/course_hello.occ diff --git a/README.md b/README.md index 79f264d..a4da1de 100644 --- a/README.md +++ b/README.md @@ -405,7 +405,70 @@ This outputs: #ENDIF ``` -> **Note:** The preprocessor and module infrastructure is in place, but the KRoC course module source files themselves use several occam features not yet supported by the transpiler (abbreviations like `VAL INT x IS 1:`, `CHAN BYTE` without `OF`, `VAL []BYTE` array slice params, `SIZE`, `:` PROC terminators). Full course module transpilation is a future goal. See [TODO.md](TODO.md) for the implementation roadmap. +### Running Programs with the Course Module + +The KRoC [course module](https://www.cs.kent.ac.uk/projects/ofa/kroc/) is a standard occam library providing I/O utilities (`out.string`, `out.int`, `out.repeat`, etc.) for character-level communication over byte channels. The transpiler fully supports it. + +Occam programs that follow the standard entry point pattern — a PROC with three `CHAN BYTE` parameters `(keyboard?, screen!, error!)` — automatically get a generated `main()` that wires stdin, stdout, and stderr to channels. + +```bash +# 1. Clone the KRoC repository (one-time setup) +./scripts/clone-kroc.sh + +# 2. Build the transpiler +go build -o occam2go + +# 3. Transpile an example that uses the course module +./occam2go -I kroc/modules/course/libsrc \ + -D TARGET.BITS.PER.WORD=32 \ + -o hello.go examples/course_hello.occ + +# 4. Run it +go run hello.go +``` + +Output: +``` +Hello from occam2go! +The answer is: 42 +------------------------------ +Counting: 1, 2, 3, 4, 5 +``` + +The `-I` flag tells the preprocessor where to find the course module source files, and `-D TARGET.BITS.PER.WORD=32` sets the word size expected by the course module (the transpiler defaults to 64). + +The example program (`examples/course_hello.occ`): +```occam +#INCLUDE "course.module" + +PROC hello (CHAN BYTE keyboard?, screen!, error!) + SEQ + out.string ("Hello from occam2go!*c*n", 0, screen!) + out.string ("The answer is: ", 0, screen!) + out.int (42, 0, screen!) + out.string ("*c*n", 0, screen!) + out.repeat ('-', 30, screen!) + out.string ("*c*n", 0, screen!) + out.string ("Counting: ", 0, screen!) + SEQ i = 1 FOR 5 + SEQ + IF + i > 1 + out.string (", ", 0, screen!) + TRUE + SKIP + out.int (i, 0, screen!) + out.string ("*c*n", 0, screen!) +: +``` + +You can also transpile the KRoC examples directly: +```bash +./occam2go -I kroc/modules/course/libsrc \ + -D TARGET.BITS.PER.WORD=32 \ + -o hello_world.go kroc/modules/course/examples/hello_world.occ +go run hello_world.go +``` ## How Channels are Mapped diff --git a/codegen/codegen.go b/codegen/codegen.go index e994e39..d9b2d69 100644 --- a/codegen/codegen.go +++ b/codegen/codegen.go @@ -17,6 +17,7 @@ type Generator struct { needOs bool // track if we need os package import needMath bool // track if we need math package import needMathBits bool // track if we need math/bits package import + needBufio bool // track if we need bufio package import // Track procedure signatures for proper pointer handling procSigs map[string][]ast.ProcParam @@ -93,6 +94,7 @@ func (g *Generator) Generate(program *ast.Program) string { g.needOs = false g.needMath = false g.needMathBits = false + g.needBufio = false g.procSigs = make(map[string][]ast.ProcParam) g.refParams = make(map[string]bool) g.protocolDefs = make(map[string]*ast.ProtocolDecl) @@ -142,42 +144,6 @@ func (g *Generator) Generate(program *ast.Program) string { g.collectRecordVars(stmt) } - // Write package declaration - g.writeLine("package main") - g.writeLine("") - - // Write imports - if g.needSync || g.needFmt || g.needTime || g.needOs || g.needMath || g.needMathBits { - g.writeLine("import (") - g.indent++ - if g.needFmt { - g.writeLine(`"fmt"`) - } - if g.needMath { - g.writeLine(`"math"`) - } - if g.needMathBits { - g.writeLine(`"math/bits"`) - } - if g.needOs { - g.writeLine(`"os"`) - } - if g.needSync { - g.writeLine(`"sync"`) - } - if g.needTime { - g.writeLine(`"time"`) - } - g.indent-- - g.writeLine(")") - g.writeLine("") - } - - // Emit transputer intrinsic helper functions - if g.needMathBits { - g.emitIntrinsicHelpers() - } - // Separate protocol, record, procedure declarations from other statements var typeDecls []ast.Statement var procDecls []ast.Statement @@ -220,6 +186,56 @@ func (g *Generator) Generate(program *ast.Program) string { } } + // Detect entry point PROC so we can set import flags before writing imports + var entryProc *ast.ProcDecl + if len(mainStatements) == 0 { + entryProc = g.findEntryProc(procDecls) + if entryProc != nil { + g.needOs = true + g.needSync = true + g.needBufio = true + } + } + + // Write package declaration + g.writeLine("package main") + g.writeLine("") + + // Write imports + if g.needSync || g.needFmt || g.needTime || g.needOs || g.needMath || g.needMathBits || g.needBufio { + g.writeLine("import (") + g.indent++ + if g.needBufio { + g.writeLine(`"bufio"`) + } + if g.needFmt { + g.writeLine(`"fmt"`) + } + if g.needMath { + g.writeLine(`"math"`) + } + if g.needMathBits { + g.writeLine(`"math/bits"`) + } + if g.needOs { + g.writeLine(`"os"`) + } + if g.needSync { + g.writeLine(`"sync"`) + } + if g.needTime { + g.writeLine(`"time"`) + } + g.indent-- + g.writeLine(")") + g.writeLine("") + } + + // Emit transputer intrinsic helper functions + if g.needMathBits { + g.emitIntrinsicHelpers() + } + // Generate type definitions first (at package level) for _, stmt := range typeDecls { g.generateStatement(stmt) @@ -265,6 +281,8 @@ func (g *Generator) Generate(program *ast.Program) string { g.nestingLevel-- g.indent-- g.writeLine("}") + } else if entryProc != nil { + g.generateEntryHarness(entryProc) } return g.builder.String() @@ -336,6 +354,125 @@ func (g *Generator) collectNestedProcSigsScoped(stmts []ast.Statement, oldSigs m } } +// findEntryProc looks for the last top-level PROC with the standard occam +// entry point signature: exactly 3 CHAN OF BYTE params (keyboard?, screen!, error!). +func (g *Generator) findEntryProc(procDecls []ast.Statement) *ast.ProcDecl { + var entry *ast.ProcDecl + for _, stmt := range procDecls { + proc, ok := stmt.(*ast.ProcDecl) + if !ok { + continue + } + if len(proc.Params) != 3 { + continue + } + p0, p1, p2 := proc.Params[0], proc.Params[1], proc.Params[2] + if p0.IsChan && p0.ChanElemType == "BYTE" && p0.ChanDir == "?" && + p1.IsChan && p1.ChanElemType == "BYTE" && p1.ChanDir == "!" && + p2.IsChan && p2.ChanElemType == "BYTE" && p2.ChanDir == "!" { + entry = proc + } + } + return entry +} + +// generateEntryHarness emits a func main() that wires stdin/stdout/stderr +// to channels and calls the entry PROC. +func (g *Generator) generateEntryHarness(proc *ast.ProcDecl) { + name := goIdent(proc.Name) + g.writeLine("func main() {") + g.indent++ + + // Create channels + g.writeLine("keyboard := make(chan byte, 256)") + g.writeLine("screen := make(chan byte, 256)") + g.writeLine("_error := make(chan byte, 256)") + g.writeLine("") + + // WaitGroup for writer goroutines to finish draining + g.writeLine("var wg sync.WaitGroup") + g.writeLine("wg.Add(2)") + g.writeLine("") + + // Screen writer goroutine + g.writeLine("go func() {") + g.indent++ + g.writeLine("defer wg.Done()") + g.writeLine("w := bufio.NewWriter(os.Stdout)") + g.writeLine("for b := range screen {") + g.indent++ + g.writeLine("if b == 255 {") + g.indent++ + g.writeLine("w.Flush()") + g.indent-- + g.writeLine("} else {") + g.indent++ + g.writeLine("w.WriteByte(b)") + g.indent-- + g.writeLine("}") + g.indent-- + g.writeLine("}") + g.writeLine("w.Flush()") + g.indent-- + g.writeLine("}()") + g.writeLine("") + + // Error writer goroutine + g.writeLine("go func() {") + g.indent++ + g.writeLine("defer wg.Done()") + g.writeLine("w := bufio.NewWriter(os.Stderr)") + g.writeLine("for b := range _error {") + g.indent++ + g.writeLine("if b == 255 {") + g.indent++ + g.writeLine("w.Flush()") + g.indent-- + g.writeLine("} else {") + g.indent++ + g.writeLine("w.WriteByte(b)") + g.indent-- + g.writeLine("}") + g.indent-- + g.writeLine("}") + g.writeLine("w.Flush()") + g.indent-- + g.writeLine("}()") + g.writeLine("") + + // Keyboard reader goroutine + g.writeLine("go func() {") + g.indent++ + g.writeLine("r := bufio.NewReader(os.Stdin)") + g.writeLine("for {") + g.indent++ + g.writeLine("b, err := r.ReadByte()") + g.writeLine("if err != nil {") + g.indent++ + g.writeLine("close(keyboard)") + g.writeLine("return") + g.indent-- + g.writeLine("}") + g.writeLine("keyboard <- b") + g.indent-- + g.writeLine("}") + g.indent-- + g.writeLine("}()") + g.writeLine("") + + // Call the entry proc + g.writeLine(fmt.Sprintf("%s(keyboard, screen, _error)", name)) + g.writeLine("") + + // Close output channels and wait for writers to drain + g.writeLine("close(screen)") + g.writeLine("close(_error)") + g.writeLine("wg.Wait()") + + g.indent-- + g.writeLine("}") +} + func (g *Generator) containsPar(stmt ast.Statement) bool { switch s := stmt.(type) { case *ast.ParBlock: diff --git a/codegen/codegen_test.go b/codegen/codegen_test.go index d4610ee..d911685 100644 --- a/codegen/codegen_test.go +++ b/codegen/codegen_test.go @@ -250,6 +250,17 @@ func TestStringLiteral(t *testing.T) { } } +func TestStringEscapeCodegen(t *testing.T) { + input := `x := "hello*c*n" +` + output := transpile(t, input) + + // The *c*n should become \r\n in the Go output (via %q formatting) + if !strings.Contains(output, `x = "hello\r\n"`) { + t.Errorf("expected string with \\r\\n escape, got:\n%s", output) + } +} + func TestByteLiteral(t *testing.T) { input := "x := 'A'\n" output := transpile(t, input) diff --git a/codegen/e2e_course_test.go b/codegen/e2e_course_test.go new file mode 100644 index 0000000..9ffdc6c --- /dev/null +++ b/codegen/e2e_course_test.go @@ -0,0 +1,96 @@ +package codegen + +import ( + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/codeassociates/occam2go/lexer" + "github.com/codeassociates/occam2go/parser" + "github.com/codeassociates/occam2go/preproc" +) + +// transpileCompileRunWithDefines is like transpileCompileRunFromFile but +// accepts preprocessor defines (e.g., TARGET.BITS.PER.WORD=32). +func transpileCompileRunWithDefines(t *testing.T, mainFile string, includePaths []string, defines map[string]string) string { + t.Helper() + + pp := preproc.New(preproc.WithIncludePaths(includePaths), preproc.WithDefines(defines)) + expanded, err := pp.ProcessFile(mainFile) + if err != nil { + t.Fatalf("preprocessor error: %v", err) + } + if len(pp.Errors()) > 0 { + for _, e := range pp.Errors() { + t.Errorf("preprocessor warning: %s", e) + } + } + + // Transpile + l := lexer.New(expanded) + p := parser.New(l) + program := p.ParseProgram() + + if len(p.Errors()) > 0 { + for _, err := range p.Errors() { + t.Errorf("parser error: %s", err) + } + t.FailNow() + } + + gen := New() + goCode := gen.Generate(program) + + // Create temp directory + tmpDir, err := os.MkdirTemp("", "occam2go-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Write Go source + goFile := filepath.Join(tmpDir, "main.go") + if err := os.WriteFile(goFile, []byte(goCode), 0644); err != nil { + t.Fatalf("failed to write Go file: %v", err) + } + + // Compile + binFile := filepath.Join(tmpDir, "main") + compileCmd := exec.Command("go", "build", "-o", binFile, goFile) + compileOutput, err := compileCmd.CombinedOutput() + if err != nil { + t.Fatalf("compilation failed: %v\nOutput: %s\nGo code:\n%s", err, compileOutput, goCode) + } + + // Run + runCmd := exec.Command(binFile) + output, err := runCmd.CombinedOutput() + if err != nil { + t.Fatalf("execution failed: %v\nOutput: %s", err, output) + } + + return string(output) +} + +func TestE2E_HelloWorldCourseModule(t *testing.T) { + // Find the kroc directory relative to this test file + krocDir := filepath.Join("..", "kroc", "modules", "course") + mainFile := filepath.Join(krocDir, "examples", "hello_world.occ") + includeDir := filepath.Join(krocDir, "libsrc") + + // Check that the files exist + if _, err := os.Stat(mainFile); os.IsNotExist(err) { + t.Skip("kroc course module not found, skipping") + } + + defines := map[string]string{ + "TARGET.BITS.PER.WORD": "32", + } + + output := transpileCompileRunWithDefines(t, mainFile, []string{includeDir}, defines) + expected := "Hello World\r\n" + if output != expected { + t.Errorf("expected %q, got %q", expected, output) + } +} diff --git a/examples/course_hello.occ b/examples/course_hello.occ new file mode 100644 index 0000000..c3a07e2 --- /dev/null +++ b/examples/course_hello.occ @@ -0,0 +1,36 @@ +-- Hello World example using the KRoC course module. +-- +-- This demonstrates the standard occam entry point pattern: +-- a PROC with three CHAN BYTE parameters (keyboard, screen, error) +-- wired to stdin, stdout, and stderr by the generated main() harness. +-- +-- The course module provides utility PROCs such as out.string, out.int, +-- and out.repeat for character-level I/O on byte channels. +-- +-- To transpile and run: +-- ./occam2go -I kroc/modules/course/libsrc \ +-- -D TARGET.BITS.PER.WORD=32 \ +-- -o hello.go examples/course_hello.occ +-- go run hello.go + +#INCLUDE "course.module" + +PROC hello (CHAN BYTE keyboard?, screen!, error!) + SEQ + out.string ("Hello from occam2go!*c*n", 0, screen!) + out.string ("The answer is: ", 0, screen!) + out.int (42, 0, screen!) + out.string ("*c*n", 0, screen!) + out.repeat ('-', 30, screen!) + out.string ("*c*n", 0, screen!) + out.string ("Counting: ", 0, screen!) + SEQ i = 1 FOR 5 + SEQ + IF + i > 1 + out.string (", ", 0, screen!) + TRUE + SKIP + out.int (i, 0, screen!) + out.string ("*c*n", 0, screen!) +: diff --git a/parser/parser.go b/parser/parser.go index 22e63cb..df08c88 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -2314,6 +2314,41 @@ func (p *Parser) parseFuncDecl() *ast.FuncDecl { return fn } +// convertOccamStringEscapes converts occam escape sequences in string literals +// to their actual byte values. Occam uses *c, *n, *t, *s, **, *", *' as escapes. +func (p *Parser) convertOccamStringEscapes(raw string) string { + var buf strings.Builder + buf.Grow(len(raw)) + for i := 0; i < len(raw); i++ { + if raw[i] == '*' && i+1 < len(raw) { + i++ + switch raw[i] { + case 'n': + buf.WriteByte('\n') + case 'c': + buf.WriteByte('\r') + case 't': + buf.WriteByte('\t') + case 's': + buf.WriteByte(' ') + case '*': + buf.WriteByte('*') + case '"': + buf.WriteByte('"') + case '\'': + buf.WriteByte('\'') + default: + // Unknown escape: pass through as-is + buf.WriteByte('*') + buf.WriteByte(raw[i]) + } + } else { + buf.WriteByte(raw[i]) + } + } + return buf.String() +} + // parseByteLiteralValue processes the raw content of a byte literal (between single quotes), // handling occam escape sequences (* prefix), and returns the resulting byte value. func (p *Parser) parseByteLiteralValue(raw string) (byte, error) { @@ -2608,7 +2643,7 @@ func (p *Parser) parseExpression(precedence int) ast.Expression { case lexer.FALSE: left = &ast.BooleanLiteral{Token: p.curToken, Value: false} case lexer.STRING: - left = &ast.StringLiteral{Token: p.curToken, Value: p.curToken.Literal} + left = &ast.StringLiteral{Token: p.curToken, Value: p.convertOccamStringEscapes(p.curToken.Literal)} case lexer.BYTE_LIT: b, err := p.parseByteLiteralValue(p.curToken.Literal) if err != nil { diff --git a/parser/parser_test.go b/parser/parser_test.go index dd34074..1213381 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1341,6 +1341,45 @@ func TestStringLiteral(t *testing.T) { } } +func TestStringEscapeConversion(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {`x := "hello*n"` + "\n", "hello\n"}, + {`x := "hello*c*n"` + "\n", "hello\r\n"}, + {`x := "*t*s"` + "\n", "\t "}, + {`x := "a**b"` + "\n", "a*b"}, + {`x := "it*'s"` + "\n", "it's"}, + {`x := "no escapes"` + "\n", "no escapes"}, + } + + for _, tt := range tests { + l := lexer.New(tt.input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("input %q: expected 1 statement, got %d", tt.input, len(program.Statements)) + } + + assign, ok := program.Statements[0].(*ast.Assignment) + if !ok { + t.Fatalf("input %q: expected Assignment, got %T", tt.input, program.Statements[0]) + } + + strLit, ok := assign.Value.(*ast.StringLiteral) + if !ok { + t.Fatalf("input %q: expected StringLiteral, got %T", tt.input, assign.Value) + } + + if strLit.Value != tt.expected { + t.Errorf("input %q: expected Value=%q, got %q", tt.input, tt.expected, strLit.Value) + } + } +} + func TestByteLiteral(t *testing.T) { tests := []struct { input string