Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on:
push:
branches:
- main
- codex/**
pull_request:

jobs:
build-and-smoke:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install toolchain
run: |
sudo apt-get update
sudo apt-get install -y \
gcc-x86-64-linux-gnu \
nasm \
qemu-system-x86 \
xorriso

- name: Fetch Limine boot files
run: make limine

- name: Build
run: make all-user -j"$(nproc)"

- name: Smoke
run: TIMEOUT_SECONDS=90 make smoke

- name: Upload QEMU logs
if: always()
uses: actions/upload-artifact@v4
with:
name: qemu-logs
path: build/**/*.log
if-no-files-found: ignore
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,7 @@ core
# Temporary files
*.tmp
tmp/

# Secrets
.env
.env.*
42 changes: 36 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,24 +123,37 @@ $(ISO): $(BUILD_DIR)/$(KERNEL) $(SERVER_BINS) limine.conf
@cp $(BUILD_DIR)/cat.elf $(ISO_DIR)/boot/ 2>/dev/null || true
@cp $(BUILD_DIR)/ls.elf $(ISO_DIR)/boot/ 2>/dev/null || true
@cp limine.conf $(ISO_DIR)/boot/
@# Try to find limine in common locations
@if [ -d "/usr/share/limine" ]; then \
@# Try to find Limine boot files in common locations
@if [ -f "/usr/share/limine/limine-bios.sys" ] && \
[ -f "/usr/share/limine/limine-bios-cd.bin" ] && \
[ -f "/usr/share/limine/limine-uefi-cd.bin" ]; then \
cp /usr/share/limine/limine-bios.sys $(ISO_DIR)/boot/; \
cp /usr/share/limine/limine-bios-cd.bin $(ISO_DIR)/boot/; \
cp /usr/share/limine/limine-uefi-cd.bin $(ISO_DIR)/boot/; \
elif [ -d "limine" ]; then \
elif [ -f "limine/limine-bios.sys" ] && \
[ -f "limine/limine-bios-cd.bin" ] && \
[ -f "limine/limine-uefi-cd.bin" ]; then \
cp limine/limine-bios.sys $(ISO_DIR)/boot/; \
cp limine/limine-bios-cd.bin $(ISO_DIR)/boot/; \
cp limine/limine-uefi-cd.bin $(ISO_DIR)/boot/; \
else \
echo "Warning: Limine not found, ISO may not be bootable"; \
echo "Error: Limine boot files not found."; \
echo "Run 'make limine' or install Limine files under /usr/share/limine."; \
exit 1; \
fi
@if ! command -v xorriso >/dev/null 2>&1; then \
echo "Error: xorriso not found. Install xorriso to build bootable ISOs."; \
exit 1; \
fi
@xorriso -as mkisofs -b boot/limine-bios-cd.bin \
-no-emul-boot -boot-load-size 4 -boot-info-table \
--efi-boot boot/limine-uefi-cd.bin \
-efi-boot-part --efi-boot-image --protective-msdos-label \
$(ISO_DIR) -o $@ 2>/dev/null || \
echo "Note: xorriso not found. Install it for ISO creation."
$(ISO_DIR) -o $@ 2>/dev/null
@if [ ! -f "$@" ]; then \
echo "Error: ISO creation failed; output file '$@' not found."; \
exit 1; \
fi
@if [ -f "/usr/bin/limine" ]; then \
limine bios-install $@ 2>/dev/null || true; \
elif [ -f "limine/limine" ]; then \
Expand Down Expand Up @@ -253,6 +266,20 @@ compile_commands:
@echo "" >> compile_commands.json
@echo "]" >> compile_commands.json

.PHONY: smoke
smoke: $(ISO)
@echo "Running QEMU smoke checks..."
@./scripts/qemu_smoke.sh

.PHONY: stress
stress: $(ISO)
@echo "Running QEMU stress checks..."
@ITERATIONS=$${ITERATIONS:-5} ./scripts/qemu_stress.sh

.PHONY: check
check: all-user smoke
@echo "Validation check passed"

# Help
.PHONY: help
help:
Expand All @@ -265,6 +292,9 @@ help:
@echo " run-window Run with serial on PTY (use screen to connect)"
@echo " debug Run in QEMU with GDB server"
@echo " run-kernel Run kernel directly (no ISO)"
@echo " smoke Run deterministic QEMU smoke checks"
@echo " stress Run repeated QEMU smoke checks"
@echo " check Build and run smoke checks"
@echo " clean Remove build artifacts"
@echo " limine Download Limine bootloader"
@echo " info Show build configuration"
Expand Down
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ sudo apt install gcc-x86-64-linux-gnu nasm xorriso qemu-system-x86
# Build bootable ISO
make

# Build kernel + userspace + ISO explicitly
make all-user

# Run in QEMU
make run

Expand All @@ -68,6 +71,11 @@ make clean

# Show build configuration
make info

# Run deterministic validation gates
make smoke
make stress
make check
```

## Project Structure
Expand Down Expand Up @@ -144,13 +152,21 @@ Ocean includes an interactive shell with the following built-in commands:

External commands (like `ls`) are loaded from boot modules and executed via fork/exec.

## Docs

- [docs/STATUS.md](docs/STATUS.md) - Current snapshot and roadmap
- [docs/CODEX_WORKFLOW.md](docs/CODEX_WORKFLOW.md) - Codex development workflow

## Development

### Running Tests

```bash
# Run in QEMU with serial output
make run
# Build and run deterministic smoke test
make check

# Run stress loop (repeated boots)
make stress

# Debug with GDB
make debug
Expand Down
39 changes: 39 additions & 0 deletions docs/ARCH_V2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Ocean Architecture v2 (In-Progress)

## Goals
- Keep kernel minimal while hardening safety boundaries.
- Move from ad hoc pointer handling to explicit validated user-memory access.
- Establish deterministic validation gates for every kernel/userspace change.

## Kernel Boundaries
1. Syscall boundary
- All user pointers must flow through `uaccess` helpers.
- Syscall handlers return negative errno-style values for failure.

2. IPC boundary
- Endpoints are refcounted objects with explicit dead/listed state.
- Endpoint destruction is asynchronous with respect to outstanding references.
- Wait queue nodes are allocation-backed and not stack-persistent across sleep.

3. Scheduler boundary
- Channel sleep/wakeup is implemented via global thread registry.
- Sleepers are woken by channel identity and transitioned through scheduler APIs only.

4. Process boundary
- Parent/child wait semantics include real reaping and resource teardown.
- Children are reparanted to init on parent exit.

5. Memory boundary
- VMM page accounting tracks actual mapping/unmapping.
- Slab and page allocator interaction uses page flags (`PG_SLAB`, compound head/order) to free correctly.

## Validation Contract
- `make all-user`: compile kernel + userspace + ISO.
- `make smoke`: deterministic boot + init signatures, panic/fault signature scan.
- `make stress`: repeated smoke cycles.
- CI (`.github/workflows/ci.yml`) runs build + smoke and archives serial logs.

## Current Non-Goals
- Full SMP correctness and per-CPU scheduler isolation.
- Full capability transfer/cspace enforcement.
- Fully wired production-grade VFS/block/filesystem pipeline.
44 changes: 44 additions & 0 deletions docs/AUDIT_2026-02.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Ocean Deep Audit (February 2026)

## Scope
- Full repository static audit across kernel, userspace servers, drivers, and shared headers.
- Correctness-first remediation of critical kernel defects that could crash or corrupt runtime state.
- Validation tooling expansion to support repeatable build + smoke + stress gates.

## Critical Findings and Fixes
1. User pointer safety gaps in syscall handlers
- Risk: direct dereference of user pointers in syscall paths (`read`, `write`, `debug_print`, `exec`, `wait`, IPC receive output).
- Fix: introduced centralized user access layer in `kernel/mm/uaccess.c` and `kernel/include/ocean/uaccess.h`; syscall handlers now use `copy_from_user`, `copy_to_user`, and `copy_string_from_user`.

2. IPC endpoint lifetime/use-after-free race
- Risk: `endpoint_destroy()` freed endpoints while references could still exist.
- Fix: converted to two-phase teardown: mark dead + remove from global list, free only at final `endpoint_put()` refcount drop.

3. IPC wait object lifetime hazards
- Risk: stack-backed wait objects were queued across scheduler handoffs.
- Fix: wait objects now allocate on heap and are defensively removed on wakeup paths to avoid dangling queue links.

4. Channel wakeups were effectively non-functional
- Risk: `thread_wakeup()` was a no-op, breaking generic sleep/wake flows.
- Fix: introduced global thread registry and real channel scanning wakeup in scheduler.

5. Process lifecycle leaks and zombie reaping gaps
- Risk: waited children were not fully reclaimed; failed fork paths leaked process objects.
- Fix: added `process_reap()` for full child cleanup, wired into `process_wait()`, and hardened fork failure cleanup.

6. Kernel thread entry argument bug
- Risk: kernel thread creation ignored function arguments.
- Fix: added `kthread_entry` trampoline passing function/arg via saved callee registers.

7. Memory accounting and allocator correctness bugs
- Risk: `vmm_unmap_region()` decremented `total_vm` by requested size instead of actual unmapped pages; slab free path mishandled large allocations.
- Fix: actual unmapped page accounting; slab now tags slab pages via `PG_SLAB` and frees compound allocations with correct order.

8. Page fault handling not integrated into exception path
- Risk: all exceptions halted system before VMM fault resolution could run.
- Fix: IDT exception path now forwards page faults to `page_fault_handler()` and returns on successful resolution.

## Remaining High-Value Work
- Complete IPC `call/reply/reply_recv` semantics and capability transfer enforcement.
- Replace simulated server behavior with end-to-end live IPC-backed operations across mem/proc/vfs/blk.
- Add deeper runtime stress/fault-injection for process and memory churn paths.
76 changes: 76 additions & 0 deletions docs/CODEX_WORKFLOW.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Codex Development Workflow

This document describes a conservative, repeatable workflow for continuing Ocean development with Codex.

**Workflow**
1. Intake
2. Targeted inspection
3. Plan
4. Implement
5. Verify
6. Summarize

**Intake**
- Restate the goal and scope.
- Identify constraints and success criteria.
- Confirm whether to run `make` or QEMU runs.

**Targeted Inspection**
- Locate the relevant subsystem first in `kernel/`, then `servers/`, `lib/`, `drivers/`, `fs/`, and `include/`.
- Read headers and call sites before proposing changes.

**Plan**
- Propose a minimal set of files and changes.
- Call out risks and missing pieces.
- Explicitly note when behavior is simulated or stubbed.

**Implement**
- Prefer small diffs and incremental commits.
- Preserve existing code style and conventions.
- Avoid cross-cutting refactors unless requested.

**Verify**
- Default to no build or QEMU runs unless asked.
- Run `make` when explicitly requested or when the change is large and the user approves.

**Summarize**
- Describe what changed and why.
- Point to key files.
- Note any follow-up risks or gaps.

**Validation Matrix**

| Change Type | Default Action | When to Run `make` | When to Run QEMU |
| --- | --- | --- | --- |
| Documentation-only | No build | Only on request | Only on request |
| Kernel changes | Analyze first | With explicit approval | Only on request |
| Userspace server changes | Analyze first | With explicit approval | Only on request |
| Boot configuration changes | Analyze first | With explicit approval | Only on request |

**Change Checklists**

New syscall
- Add number and documentation in `kernel/include/ocean/syscall.h` and `lib/libocean/include/ocean/syscall.h`.
- Implement handler in `kernel/syscall/dispatch.c`.
- Add user pointer validation if user memory is touched.
- Update any userspace wrappers in `lib/libocean`.

IPC protocol change
- Update `include/ocean/ipc_proto.h`.
- Update server implementations that use the protocol.
- Check kernel IPC fast path or endpoint semantics if changed.

New server or driver
- Add source under `servers/` or `drivers/` or `fs/`.
- Add build rules in `user.mk`.
- Decide if it should be loaded as a boot module in `limine.conf`.
- Register well-known endpoints if applicable.

Boot module changes
- Update `limine.conf` to add or remove modules.
- Ensure `Makefile` copies the module into the ISO.

**Failure Handling**
- If the toolchain is missing, report the exact missing component and reference `tools/setup-toolchain.sh`.
- If QEMU or xorriso is missing, explain the limitation and continue with code changes.
- If a change cannot be verified locally, document the gap and a suggested command for the user to run.
38 changes: 38 additions & 0 deletions docs/STATUS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Project Status

Snapshot date: February 15, 2026

**Snapshot**
Ocean is an educational x86_64 microkernel with a working boot path, basic kernel subsystems, and a small userspace. The kernel boots via Limine into a higher-half layout, initializes CPU, memory, scheduler, IPC, and syscalls, then starts init and the shell from boot modules. This snapshot includes significant kernel safety hardening: centralized user-pointer validation, improved endpoint lifetime management, functional channel wakeups, and deterministic QEMU smoke/stress tooling.

**What Works**
- Boot and arch: Limine boot, higher-half kernel, early serial console, GDT/TSS, IDT/ISR, PIT timer, SYSCALL entry, PIC remap.
- Memory: PMM with bitmap and buddy allocator; VMM with VMAs and paging; kernel heap via slab.
- Scheduler: O(1) priority queues, preemptive tick, single-CPU only with per-CPU scaffolding.
- Processes: basic process and thread structs, fork/exec/wait path, kernel threads.
- IPC: endpoints and synchronous send/recv with fast path.
- Syscall safety: user buffer/string access now goes through kernel `uaccess` helpers.
- Process lifecycle: waited children are reaped with resource cleanup.
- Validation tooling: `make smoke`, `make stress`, and CI smoke workflow.
- Syscalls: minimal set; read and write backed by serial I/O.
- Userspace: minimal libc, init server, shell, and small utilities.

**What Is Stubbed or Simulated**
- IPC call/reply semantics, capability transfer, and cspace integration.
- Process lifecycle beyond reaping (signals, multithreaded exit edge cases).
- Memory server, process server, VFS server, block server, and drivers are simulated and do not yet perform real kernel-mediated operations.
- Filesystem drivers and block drivers are not wired into live IPC or VFS routing.
- Boot modules load only init, shell, and a few utilities in `limine.conf`.

**Kernel-first Improvements**
- Complete IPC reply/call semantics, including reply endpoints and tracking caller context.
- Complete capability transfer and cspace enforcement for endpoints and other objects.
- Finish process lifecycle behavior beyond wait/reap (signals, multithread edge cases).
- Continue memory correctness work: page refcounting and COW teardown.
- Harden scheduler edge cases and build toward real SMP enablement.

**Secondary Improvements**
- Wire init to actually spawn services and register well-known endpoints.
- Implement real IPC request/response loops in mem, proc, vfs, and blk servers.
- Integrate filesystem drivers with VFS and block server.
- Add developer tooling: repeatable QEMU run configs, compile_commands generation in CI, and basic smoke tests.
2 changes: 2 additions & 0 deletions include/ocean/ipc_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#include <stdint.h>

#define IPC_PROTO_VERSION 2

/*
* Message tag format (64 bits):
* [63:44] Label - User-defined message type (20 bits)
Expand Down
6 changes: 6 additions & 0 deletions kernel/arch/x86_64/interrupt/idt.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ void idt_init(void)
*/
void exception_handler(struct trap_frame *frame)
{
if (frame->int_no == VEC_PAGE_FAULT) {
extern void page_fault_handler(u64 error_code);
page_fault_handler(frame->error_code);
return;
}

const char *name = "Unknown";

if (frame->int_no < 32) {
Expand Down
Loading