From 8da78353282729d845beea0eca663e85b21d5137 Mon Sep 17 00:00:00 2001 From: Jack Gaffney Date: Sun, 25 Jan 2026 23:28:02 -0500 Subject: [PATCH 1/4] Remove env files; ignore .env/.env.* --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index c5d1e05..4df5e8d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,7 @@ core # Temporary files *.tmp tmp/ + +# Secrets +.env +.env.* From 72bdc88520d68817a5d0d0dac1050b75975923cd Mon Sep 17 00:00:00 2001 From: Jack Gaffney Date: Sun, 15 Feb 2026 01:16:59 -0500 Subject: [PATCH 2/4] M1-M3: harden kernel safety, IPC lifetime, and process/memory correctness --- include/ocean/ipc_proto.h | 2 + kernel/arch/x86_64/interrupt/idt.c | 6 + kernel/arch/x86_64/mm/paging.c | 1 + kernel/include/ocean/ipc.h | 1 + kernel/include/ocean/process.h | 1 + kernel/include/ocean/uaccess.h | 36 ++++++ kernel/include/ocean/vmm.h | 4 + kernel/ipc/endpoint.c | 50 ++++++--- kernel/ipc/message.c | 116 ++++++++++--------- kernel/mm/fault.c | 3 +- kernel/mm/slab.c | 58 +++++++--- kernel/mm/uaccess.c | 137 +++++++++++++++++++++++ kernel/mm/vmm.c | 16 ++- kernel/proc/process.c | 172 ++++++++++++++++++++++++----- kernel/sched/core.c | 32 ++++-- kernel/syscall/dispatch.c | 156 ++++++++++++++++++++------ 16 files changed, 628 insertions(+), 163 deletions(-) create mode 100644 kernel/include/ocean/uaccess.h create mode 100644 kernel/mm/uaccess.c diff --git a/include/ocean/ipc_proto.h b/include/ocean/ipc_proto.h index 0fdfc85..4f8d38c 100644 --- a/include/ocean/ipc_proto.h +++ b/include/ocean/ipc_proto.h @@ -9,6 +9,8 @@ #include +#define IPC_PROTO_VERSION 2 + /* * Message tag format (64 bits): * [63:44] Label - User-defined message type (20 bits) diff --git a/kernel/arch/x86_64/interrupt/idt.c b/kernel/arch/x86_64/interrupt/idt.c index a19ce83..c4419c1 100644 --- a/kernel/arch/x86_64/interrupt/idt.c +++ b/kernel/arch/x86_64/interrupt/idt.c @@ -185,6 +185,12 @@ void idt_init(void) */ void exception_handler(struct trap_frame *frame) { + if (frame->int_no == VEC_PAGE_FAULT) { + extern void page_fault_handler(u64 error_code); + page_fault_handler(frame->error_code); + return; + } + const char *name = "Unknown"; if (frame->int_no < 32) { diff --git a/kernel/arch/x86_64/mm/paging.c b/kernel/arch/x86_64/mm/paging.c index b66b984..ef85c4e 100644 --- a/kernel/arch/x86_64/mm/paging.c +++ b/kernel/arch/x86_64/mm/paging.c @@ -314,6 +314,7 @@ void paging_switch(struct address_space *as) { if (as && as->pml4_phys) { write_cr3(as->pml4_phys); + vmm_set_current(as); } } diff --git a/kernel/include/ocean/ipc.h b/kernel/include/ocean/ipc.h index ff1c338..c18a554 100644 --- a/kernel/include/ocean/ipc.h +++ b/kernel/include/ocean/ipc.h @@ -139,6 +139,7 @@ struct ipc_endpoint { #define EP_FLAG_REPLY (1 << 1) /* Reply endpoint */ #define EP_FLAG_NOTIFICATION (1 << 2) /* Notification endpoint */ #define EP_FLAG_DEAD (1 << 3) /* Endpoint destroyed */ +#define EP_FLAG_LISTED (1 << 4) /* Present in global endpoint list */ /* * IPC Wait state - saved when thread blocks on IPC diff --git a/kernel/include/ocean/process.h b/kernel/include/ocean/process.h index a582d04..9487742 100644 --- a/kernel/include/ocean/process.h +++ b/kernel/include/ocean/process.h @@ -122,6 +122,7 @@ struct thread { /* Scheduler linkage */ struct list_head run_list; /* Link in run queue */ struct list_head thread_list; /* Link in process's thread list */ + struct list_head all_list; /* Link in global thread list */ /* Wait queue */ struct list_head wait_list; /* Link in wait queue */ diff --git a/kernel/include/ocean/uaccess.h b/kernel/include/ocean/uaccess.h new file mode 100644 index 0000000..941879e --- /dev/null +++ b/kernel/include/ocean/uaccess.h @@ -0,0 +1,36 @@ +/* + * Ocean Kernel - User Memory Access Helpers + * + * Centralized helpers for validating and copying user pointers. + */ + +#ifndef _OCEAN_UACCESS_H +#define _OCEAN_UACCESS_H + +#include + +/* Forward declaration to avoid heavy includes in callers */ +struct process; + +/* + * Validate that [ptr, ptr + len) is a valid user range. + * + * required_vma_flags is a VMA_* bitmask (for example VMA_READ/VMA_WRITE). + * Returns 0 on success or -EFAULT/-EINVAL on failure. + */ +int validate_user_range(const void *ptr, size_t len, u32 required_vma_flags); + +/* Copy from user memory into kernel memory. Returns 0 or negative errno. */ +int copy_from_user(void *dst, const void *src, size_t len); + +/* Copy from kernel memory into user memory. Returns 0 or negative errno. */ +int copy_to_user(void *dst, const void *src, size_t len); + +/* + * Copy a NUL-terminated user string into a kernel buffer. + * On success returns string length (excluding NUL). + * Returns -EFAULT for invalid user memory or -ENAMETOOLONG if no NUL fits. + */ +int copy_string_from_user(char *dst, size_t dst_size, const char *src); + +#endif /* _OCEAN_UACCESS_H */ diff --git a/kernel/include/ocean/vmm.h b/kernel/include/ocean/vmm.h index 2b46ef2..81324dd 100644 --- a/kernel/include/ocean/vmm.h +++ b/kernel/include/ocean/vmm.h @@ -238,6 +238,10 @@ int vmm_mprotect(struct address_space *as, u64 addr, u64 size, u32 prot); /* Handle a page fault */ int vmm_page_fault(u64 fault_addr, u64 error_code); +/* Track current CPU's active address space for fault handling */ +void vmm_set_current(struct address_space *as); +struct address_space *vmm_get_current(void); + /* * Kernel Heap (kmalloc/kfree) */ diff --git a/kernel/ipc/endpoint.c b/kernel/ipc/endpoint.c index bb3e285..128f347 100644 --- a/kernel/ipc/endpoint.c +++ b/kernel/ipc/endpoint.c @@ -51,7 +51,7 @@ struct ipc_endpoint *endpoint_create(struct process *owner, u32 flags) memset(ep, 0, sizeof(*ep)); ep->id = alloc_endpoint_id(); - ep->flags = flags; + ep->flags = flags | EP_FLAG_LISTED; ep->owner = owner; ep->refcount = 1; @@ -80,16 +80,27 @@ void endpoint_destroy(struct ipc_endpoint *ep) return; } + bool remove_from_list = false; + spin_lock(&ep->lock); + if (ep->flags & EP_FLAG_DEAD) { + spin_unlock(&ep->lock); + return; + } + /* Mark as dead */ ep->flags |= EP_FLAG_DEAD; + if (ep->flags & EP_FLAG_LISTED) { + ep->flags &= ~EP_FLAG_LISTED; + remove_from_list = true; + } /* Wake all senders with error */ while (!list_empty(&ep->send_queue)) { struct list_head *node = ep->send_queue.next; struct ipc_wait *wait = container_of(node, struct ipc_wait, wait_list); - list_del(node); + list_del_init(node); wait->result = IPC_ERR_DEAD; if (wait->partner) { sched_wakeup(wait->partner); @@ -100,7 +111,7 @@ void endpoint_destroy(struct ipc_endpoint *ep) while (!list_empty(&ep->recv_queue)) { struct list_head *node = ep->recv_queue.next; struct ipc_wait *wait = container_of(node, struct ipc_wait, wait_list); - list_del(node); + list_del_init(node); wait->result = IPC_ERR_DEAD; if (wait->partner) { sched_wakeup(wait->partner); @@ -109,14 +120,21 @@ void endpoint_destroy(struct ipc_endpoint *ep) spin_unlock(&ep->lock); - /* Remove from global list */ - spin_lock(&endpoint_list_lock); - list_del(&ep->list); - spin_unlock(&endpoint_list_lock); + if (remove_from_list) { + spin_lock(&endpoint_list_lock); + if (!list_empty(&ep->list)) { + list_del_init(&ep->list); + } + spin_unlock(&endpoint_list_lock); - kprintf("[ipc] Destroyed endpoint %u\n", ep->id); + /* + * Drop the list/owner reference from endpoint_create(). + * Memory is actually freed when the final holder drops via endpoint_put(). + */ + endpoint_put(ep); + } - kfree(ep); + kprintf("[ipc] Endpoint %u marked dead\n", ep->id); } /* @@ -132,7 +150,7 @@ struct ipc_endpoint *endpoint_get(u32 id) list_for_each(node, &endpoint_list) { struct ipc_endpoint *e = container_of(node, struct ipc_endpoint, list); if (e->id == id && !(e->flags & EP_FLAG_DEAD)) { - e->refcount++; + __atomic_fetch_add(&e->refcount, 1, __ATOMIC_RELAXED); ep = e; break; } @@ -152,13 +170,10 @@ void endpoint_put(struct ipc_endpoint *ep) return; } - spin_lock(&ep->lock); - ep->refcount--; - int should_free = (ep->refcount == 0); - spin_unlock(&ep->lock); - - if (should_free) { - endpoint_destroy(ep); + int refs = __atomic_sub_fetch(&ep->refcount, 1, __ATOMIC_ACQ_REL); + if (refs == 0) { + kprintf("[ipc] Destroyed endpoint %u\n", ep->id); + kfree(ep); } } @@ -251,6 +266,7 @@ void ipc_dump_endpoint(struct ipc_endpoint *ep) if (ep->flags & EP_FLAG_REPLY) kprintf(" REPLY"); if (ep->flags & EP_FLAG_NOTIFICATION) kprintf(" NOTIFICATION"); if (ep->flags & EP_FLAG_DEAD) kprintf(" DEAD"); + if (ep->flags & EP_FLAG_LISTED) kprintf(" LISTED"); kprintf("\n"); kprintf(" Refcount: %d\n", ep->refcount); diff --git a/kernel/ipc/message.c b/kernel/ipc/message.c index 24da4b2..e497b6c 100644 --- a/kernel/ipc/message.c +++ b/kernel/ipc/message.c @@ -15,6 +15,8 @@ extern int kprintf(const char *fmt, ...); extern void *memcpy(void *dest, const void *src, size_t n); extern void *memset(void *s, int c, size_t n); +extern void *kmalloc(size_t size); +extern void kfree(void *ptr); /* Global IPC state */ static u64 ipc_total_messages = 0; @@ -59,33 +61,15 @@ static void copy_message(struct ipc_message *dst, struct ipc_message *src) /* * Find a waiting receiver on the endpoint */ -static struct thread *find_receiver(struct ipc_endpoint *ep) +static struct ipc_wait *peek_waiter(struct list_head *queue) { - if (list_empty(&ep->recv_queue)) { + if (list_empty(queue)) { return NULL; } - struct list_head *node = ep->recv_queue.next; + struct list_head *node = queue->next; struct ipc_wait *wait = container_of(node, struct ipc_wait, wait_list); - - /* The thread is embedded in thread_ipc which is part of a larger structure */ - /* For now, we store the thread pointer in wait->partner */ - return wait->partner; -} - -/* - * Find a waiting sender on the endpoint - */ -static struct thread *find_sender(struct ipc_endpoint *ep) -{ - if (list_empty(&ep->send_queue)) { - return NULL; - } - - struct list_head *node = ep->send_queue.next; - struct ipc_wait *wait = container_of(node, struct ipc_wait, wait_list); - - return wait->partner; + return wait; } /* @@ -97,7 +81,6 @@ static struct thread *find_sender(struct ipc_endpoint *ep) int ipc_send(struct ipc_endpoint *ep, struct ipc_message *msg) { struct thread *self = get_current(); - struct thread *receiver; int result = IPC_OK; if (!ep || !msg) { @@ -113,15 +96,14 @@ int ipc_send(struct ipc_endpoint *ep, struct ipc_message *msg) } /* Look for a waiting receiver */ - receiver = find_receiver(ep); + struct ipc_wait *recv_wait = peek_waiter(&ep->recv_queue); - if (receiver) { + if (recv_wait) { /* Direct transfer - receiver is waiting */ - struct ipc_wait *recv_wait = container_of( - ep->recv_queue.next, struct ipc_wait, wait_list); + struct thread *receiver = recv_wait->partner; /* Remove receiver from wait queue */ - list_del(&recv_wait->wait_list); + list_del_init(&recv_wait->wait_list); /* Copy message to receiver's buffer */ if (recv_wait->msg) { @@ -147,26 +129,41 @@ int ipc_send(struct ipc_endpoint *ep, struct ipc_message *msg) return IPC_ERR_NOPARTNER; } else { /* Block and wait for receiver */ - struct ipc_wait wait; - wait.endpoint = ep; - wait.msg = msg; - wait.partner = self; - wait.operation = IPC_OP_SEND; - wait.result = IPC_ERR_NOPARTNER; - INIT_LIST_HEAD(&wait.wait_list); + struct ipc_wait *wait = kmalloc(sizeof(*wait)); + if (!wait) { + spin_unlock(&ep->lock); + return IPC_ERR_BUSY; + } + + wait->endpoint = ep; + wait->msg = msg; + wait->partner = self; + wait->operation = IPC_OP_SEND; + wait->result = IPC_ERR_NOPARTNER; + INIT_LIST_HEAD(&wait->wait_list); /* Add to send queue */ - list_add_tail(&wait.wait_list, &ep->send_queue); + list_add_tail(&wait->wait_list, &ep->send_queue); spin_unlock(&ep->lock); /* Sleep until a receiver arrives */ kprintf("[ipc] Send: blocking TID %d\n", self->tid); - thread_sleep(&wait); + thread_sleep(wait); + + /* + * Defensive cleanup for spurious wakeups: remove from queue if still linked. + */ + spin_lock(&ep->lock); + if (!list_empty(&wait->wait_list)) { + list_del_init(&wait->wait_list); + } + spin_unlock(&ep->lock); /* Woken up - check result */ - result = wait.result; + result = wait->result; kprintf("[ipc] Send: TID %d woke, result=%d\n", self->tid, result); + kfree(wait); } return result; @@ -181,7 +178,6 @@ int ipc_send(struct ipc_endpoint *ep, struct ipc_message *msg) int ipc_recv(struct ipc_endpoint *ep, struct ipc_message *msg) { struct thread *self = get_current(); - struct thread *sender; int result = IPC_OK; if (!ep || !msg) { @@ -197,15 +193,14 @@ int ipc_recv(struct ipc_endpoint *ep, struct ipc_message *msg) } /* Look for a waiting sender */ - sender = find_sender(ep); + struct ipc_wait *send_wait = peek_waiter(&ep->send_queue); - if (sender) { + if (send_wait) { /* Direct transfer - sender is waiting */ - struct ipc_wait *send_wait = container_of( - ep->send_queue.next, struct ipc_wait, wait_list); + struct thread *sender = send_wait->partner; /* Remove sender from wait queue */ - list_del(&send_wait->wait_list); + list_del_init(&send_wait->wait_list); /* Copy message from sender's buffer */ if (send_wait->msg) { @@ -230,26 +225,39 @@ int ipc_recv(struct ipc_endpoint *ep, struct ipc_message *msg) return IPC_ERR_NOPARTNER; } else { /* Block and wait for sender */ - struct ipc_wait wait; - wait.endpoint = ep; - wait.msg = msg; - wait.partner = self; - wait.operation = IPC_OP_RECV; - wait.result = IPC_ERR_NOPARTNER; - INIT_LIST_HEAD(&wait.wait_list); + struct ipc_wait *wait = kmalloc(sizeof(*wait)); + if (!wait) { + spin_unlock(&ep->lock); + return IPC_ERR_BUSY; + } + + wait->endpoint = ep; + wait->msg = msg; + wait->partner = self; + wait->operation = IPC_OP_RECV; + wait->result = IPC_ERR_NOPARTNER; + INIT_LIST_HEAD(&wait->wait_list); /* Add to receive queue */ - list_add_tail(&wait.wait_list, &ep->recv_queue); + list_add_tail(&wait->wait_list, &ep->recv_queue); spin_unlock(&ep->lock); /* Sleep until a sender arrives */ kprintf("[ipc] Recv: blocking TID %d\n", self->tid); - thread_sleep(&wait); + thread_sleep(wait); + + /* Defensive cleanup for spurious wakeups. */ + spin_lock(&ep->lock); + if (!list_empty(&wait->wait_list)) { + list_del_init(&wait->wait_list); + } + spin_unlock(&ep->lock); /* Woken up - check result */ - result = wait.result; + result = wait->result; kprintf("[ipc] Recv: TID %d woke, result=%d\n", self->tid, result); + kfree(wait); } return result; diff --git a/kernel/mm/fault.c b/kernel/mm/fault.c index ae63a53..ff12011 100644 --- a/kernel/mm/fault.c +++ b/kernel/mm/fault.c @@ -132,8 +132,9 @@ static int handle_stack_growth(struct address_space *as, u64 fault_addr, } } + u64 added_pages = (vma->start - new_start) / PAGE_SIZE; vma->start = new_start; - as->total_vm += (vma->start - new_start) / PAGE_SIZE; + as->total_vm += added_pages; return 0; } diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c index 7057385..8b41a23 100644 --- a/kernel/mm/slab.c +++ b/kernel/mm/slab.c @@ -52,6 +52,14 @@ static spinlock_t cache_list_lock; static struct slab_cache *kmalloc_caches[KMALLOC_NUM_CACHES]; static bool slab_initialized = false; +static inline struct page *virt_to_page_meta(void *addr) +{ + if (!addr) { + return NULL; + } + return phys_to_page(virt_to_phys(addr)); +} + /* * Get the slab structure from an object pointer */ @@ -119,6 +127,11 @@ static struct slab *slab_alloc_new(struct slab_cache *cache) } *freelist = NULL; + struct page *meta = virt_to_page_meta(page); + if (meta) { + page_set_flag(meta, PG_SLAB); + } + cache->total_slabs++; return slab; @@ -129,6 +142,10 @@ static struct slab *slab_alloc_new(struct slab_cache *cache) */ static void slab_free_slab(struct slab *slab) { + struct page *meta = virt_to_page_meta(slab); + if (meta) { + page_clear_flag(meta, PG_SLAB); + } slab->cache->total_slabs--; free_page(slab); } @@ -428,27 +445,29 @@ void kfree(void *ptr) { if (!ptr) return; - /* Check if this is a page-aligned large allocation */ - if (((u64)ptr & (PAGE_SIZE - 1)) == 0) { - /* Could be a large allocation or a slab page */ - struct slab *slab = (struct slab *)ptr; + struct page *meta = virt_to_page_meta(ptr); + if (!meta) { + return; + } - /* Check if it looks like a slab */ - if (slab->cache && slab->start) { - /* This is a slab - the object must be within it */ + if (meta->flags & PG_SLAB) { + struct slab *slab = obj_to_slab(ptr); + if (slab->cache) { slab_free(slab->cache, ptr); - return; } + return; + } - /* Must be a large allocation */ - free_page(ptr); + /* Large/page allocations are always page-aligned. */ + if (((u64)ptr & (PAGE_SIZE - 1)) != 0) { + kprintf("kfree: non-slab pointer %p is not page-aligned\n", ptr); return; } - /* Normal slab allocation */ - struct slab *slab = obj_to_slab(ptr); - if (slab->cache) { - slab_free(slab->cache, ptr); + if ((meta->flags & PG_HEAD) && (meta->flags & PG_COMPOUND)) { + free_pages(ptr, meta->order); + } else { + free_page(ptr); } } @@ -472,12 +491,17 @@ size_t ksize(void *ptr) { if (!ptr) return 0; - struct slab *slab = obj_to_slab(ptr); - if (slab->cache) { + struct page *meta = virt_to_page_meta(ptr); + if (meta && (meta->flags & PG_SLAB)) { + struct slab *slab = obj_to_slab(ptr); return slab->cache->obj_size; } - return PAGE_SIZE; /* Assume at least one page */ + if (meta && (meta->flags & PG_HEAD) && (meta->flags & PG_COMPOUND)) { + return (size_t)(1UL << meta->order) * PAGE_SIZE; + } + + return PAGE_SIZE; } /* diff --git a/kernel/mm/uaccess.c b/kernel/mm/uaccess.c new file mode 100644 index 0000000..72b3d0c --- /dev/null +++ b/kernel/mm/uaccess.c @@ -0,0 +1,137 @@ +/* + * Ocean Kernel - User Memory Access Helpers + * + * Centralized user pointer validation and copy helpers. + */ + +#include +#include +#include +#include + +/* External functions */ +extern void *memcpy(void *dest, const void *src, size_t n); + +int validate_user_range(const void *ptr, size_t len, u32 required_vma_flags) +{ + if (!ptr) { + return -EFAULT; + } + if (len == 0) { + return 0; + } + + u64 start = (u64)(uintptr_t)ptr; + if (start > USER_SPACE_END) { + return -EFAULT; + } + + if ((len - 1) > (USER_SPACE_END - start)) { + return -EFAULT; + } + + u64 end = start + len; + + struct process *proc = get_current_process(); + if (!proc || !proc->mm) { + return -EFAULT; + } + + struct address_space *as = proc->mm; + u64 flags; + spin_lock_irqsave(&as->lock, &flags); + + u64 cursor = start; + while (cursor < end) { + struct vm_area *vma = vmm_find_vma(as, cursor); + if (!vma || cursor < vma->start) { + spin_unlock_irqrestore(&as->lock, flags); + return -EFAULT; + } + + if ((required_vma_flags & VMA_READ) && !(vma->flags & VMA_READ)) { + spin_unlock_irqrestore(&as->lock, flags); + return -EFAULT; + } + if ((required_vma_flags & VMA_WRITE) && !(vma->flags & VMA_WRITE)) { + spin_unlock_irqrestore(&as->lock, flags); + return -EFAULT; + } + if ((required_vma_flags & VMA_EXEC) && !(vma->flags & VMA_EXEC)) { + spin_unlock_irqrestore(&as->lock, flags); + return -EFAULT; + } + + if (vma->end <= cursor) { + spin_unlock_irqrestore(&as->lock, flags); + return -EFAULT; + } + + if (vma->end >= end) { + break; + } + + cursor = vma->end; + } + + spin_unlock_irqrestore(&as->lock, flags); + return 0; +} + +int copy_from_user(void *dst, const void *src, size_t len) +{ + if (!dst) { + return -EINVAL; + } + if (len == 0) { + return 0; + } + + int ret = validate_user_range(src, len, VMA_READ); + if (ret < 0) { + return ret; + } + + memcpy(dst, src, len); + return 0; +} + +int copy_to_user(void *dst, const void *src, size_t len) +{ + if (!src) { + return -EINVAL; + } + if (len == 0) { + return 0; + } + + int ret = validate_user_range(dst, len, VMA_WRITE); + if (ret < 0) { + return ret; + } + + memcpy(dst, src, len); + return 0; +} + +int copy_string_from_user(char *dst, size_t dst_size, const char *src) +{ + if (!dst || !src || dst_size == 0) { + return -EINVAL; + } + + for (size_t i = 0; i < dst_size; i++) { + int ret = validate_user_range((const void *)(src + i), 1, VMA_READ); + if (ret < 0) { + return ret; + } + + dst[i] = *((volatile const char *)(src + i)); + if (dst[i] == '\0') { + return (int)i; + } + } + + dst[dst_size - 1] = '\0'; + return -ENAMETOOLONG; +} diff --git a/kernel/mm/vmm.c b/kernel/mm/vmm.c index 0a6fa6e..c51b73a 100644 --- a/kernel/mm/vmm.c +++ b/kernel/mm/vmm.c @@ -315,6 +315,7 @@ int vmm_map_region(struct address_space *as, u64 start, u64 size, u32 flags) int vmm_unmap_region(struct address_space *as, u64 start, u64 size) { u64 end = start + size; + u64 unmapped_pages = 0; /* Find and remove affected VMAs */ struct vm_area *vma, *tmp; @@ -338,6 +339,7 @@ int vmm_unmap_region(struct address_space *as, u64 start, u64 size) phys_addr_t phys = *pte & PTE_ADDR_MASK; const struct boot_info *boot = get_boot_info(); free_page((void *)(phys + boot->hhdm_offset)); + unmapped_pages++; } paging_unmap(as->pml4, addr); } @@ -369,7 +371,11 @@ int vmm_unmap_region(struct address_space *as, u64 start, u64 size) } } - as->total_vm -= size / PAGE_SIZE; + if (as->total_vm >= unmapped_pages) { + as->total_vm -= unmapped_pages; + } else { + as->total_vm = 0; + } return 0; } @@ -529,7 +535,9 @@ struct address_space *vmm_clone_address_space(struct address_space *src) void *new_page = get_free_page(); if (!new_page) { kprintf("[vmm] Failed to allocate page for fork\n"); - continue; + vma_free(new_vma); + vmm_destroy_address_space(dst); + return NULL; } /* Convert to physical address */ @@ -544,6 +552,10 @@ struct address_space *vmm_clone_address_space(struct address_space *src) int ret = paging_map(dst->pml4, addr, new_phys, flags); if (ret != 0) { kprintf("[vmm] Failed to map page at 0x%llx\n", addr); + free_page(new_page); + vma_free(new_vma); + vmm_destroy_address_space(dst); + return NULL; } } diff --git a/kernel/proc/process.c b/kernel/proc/process.c index fa9e900..f4302b7 100644 --- a/kernel/proc/process.c +++ b/kernel/proc/process.c @@ -26,6 +26,10 @@ extern void ret_from_fork(void); static LIST_HEAD(process_list); static spinlock_t process_list_lock; +/* Global thread list for channel-based wakeups */ +struct list_head all_threads = LIST_HEAD_INIT(all_threads); +spinlock_t thread_list_lock; + /* PID allocation bitmap */ static u64 pid_bitmap[PID_MAX / 64]; static spinlock_t pid_lock; @@ -34,6 +38,56 @@ static pid_t next_pid = 1; /* Init process (PID 1) */ struct process *init_process = NULL; +/* Forward declarations */ +static void free_kernel_stack(void *stack); + +static void thread_global_add(struct thread *t) +{ + u64 flags; + spin_lock_irqsave(&thread_list_lock, &flags); + list_add_tail(&t->all_list, &all_threads); + spin_unlock_irqrestore(&thread_list_lock, flags); +} + +static void thread_global_remove(struct thread *t) +{ + u64 flags; + spin_lock_irqsave(&thread_list_lock, &flags); + if (!list_empty(&t->all_list)) { + list_del_init(&t->all_list); + } + spin_unlock_irqrestore(&thread_list_lock, flags); +} + +static void process_reap(struct process *child) +{ + if (!child) { + return; + } + + if (child->main_thread) { + thread_global_remove(child->main_thread); + free_kernel_stack(child->main_thread->kernel_stack); + kfree(child->main_thread); + child->main_thread = NULL; + } + + if (child->mm) { + vmm_destroy_address_space(child->mm); + child->mm = NULL; + } + + u64 flags; + spin_lock_irqsave(&process_list_lock, &flags); + if (!list_empty(&child->proc_list)) { + list_del_init(&child->proc_list); + } + spin_unlock_irqrestore(&process_list_lock, flags); + + free_pid(child->pid); + kfree(child); +} + /* * Allocate a new PID */ @@ -129,8 +183,10 @@ void process_init(void) kprintf("Initializing process subsystem...\n"); spin_init(&process_list_lock); + spin_init(&thread_list_lock); spin_init(&pid_lock); memset(pid_bitmap, 0, sizeof(pid_bitmap)); + INIT_LIST_HEAD(&all_threads); /* Reserve PID 0 for kernel/idle */ pid_bitmap[0] |= 1; @@ -216,6 +272,25 @@ static void user_thread_start(void) } } +/* + * Kernel thread trampoline. + * + * The target function/argument are stored in r12/r13 in the saved context. + */ +static void kthread_entry(void) +{ + struct thread *t = current_thread; + int (*fn)(void *) = (int (*)(void *))t->context.r12; + void *arg = (void *)t->context.r13; + + int rc = 0; + if (fn) { + rc = fn(arg); + } + + thread_exit(rc); +} + /* * Create the main thread for a process */ @@ -275,6 +350,7 @@ struct thread *process_create_main_thread(struct process *proc, u64 entry, u64 s INIT_LIST_HEAD(&t->run_list); INIT_LIST_HEAD(&t->thread_list); INIT_LIST_HEAD(&t->wait_list); + INIT_LIST_HEAD(&t->all_list); /* CPU affinity - allow all CPUs */ t->cpu = 0; @@ -291,6 +367,8 @@ struct thread *process_create_main_thread(struct process *proc, u64 entry, u64 s proc->main_thread = t; spin_unlock_irqrestore(&proc->lock, flags); + thread_global_add(t); + return t; } @@ -311,7 +389,12 @@ struct thread *kthread_create(int (*fn)(void *), void *arg, const char *name) /* Create the thread */ struct thread *t = kmalloc(sizeof(struct thread)); if (!t) { - /* TODO: cleanup proc */ + u64 flags; + spin_lock_irqsave(&process_list_lock, &flags); + list_del_init(&proc->proc_list); + spin_unlock_irqrestore(&process_list_lock, flags); + free_pid(proc->pid); + kfree(proc); return NULL; } @@ -332,6 +415,12 @@ struct thread *kthread_create(int (*fn)(void *), void *arg, const char *name) t->kernel_stack = alloc_kernel_stack(); if (!t->kernel_stack) { kfree(t); + u64 flags; + spin_lock_irqsave(&process_list_lock, &flags); + list_del_init(&proc->proc_list); + spin_unlock_irqrestore(&process_list_lock, flags); + free_pid(proc->pid); + kfree(proc); return NULL; } t->kernel_stack_size = KERNEL_STACK_SIZE; @@ -349,19 +438,15 @@ struct thread *kthread_create(int (*fn)(void *), void *arg, const char *name) * loaded by switch_context. */ t->context.rsp = kstack_top - 8; - t->context.rip = (u64)fn; /* Will "return" here */ + t->context.rip = (u64)kthread_entry; t->context.rbp = 0; - - /* Store arg in a callee-saved register that gets restored */ - /* We'll pass arg via r12, and the thread function gets it from there */ - /* Actually, for kernel threads, we need a wrapper. For now, simple approach: */ - /* The function will be called directly. arg is not easily passed. */ - /* TODO: Implement proper kthread entry wrapper */ - (void)arg; + t->context.r12 = (u64)fn; + t->context.r13 = (u64)arg; INIT_LIST_HEAD(&t->run_list); INIT_LIST_HEAD(&t->thread_list); INIT_LIST_HEAD(&t->wait_list); + INIT_LIST_HEAD(&t->all_list); t->cpu = 0; t->cpu_mask = ~0ULL; @@ -375,6 +460,8 @@ struct thread *kthread_create(int (*fn)(void *), void *arg, const char *name) proc->main_thread = t; spin_unlock_irqrestore(&proc->lock, flags); + thread_global_add(t); + return t; } @@ -460,10 +547,12 @@ void thread_exit(int code) /* Remove from process */ u64 flags; spin_lock_irqsave(&proc->lock, &flags); - list_del(&t->thread_list); + list_del_init(&t->thread_list); proc->nr_threads--; spin_unlock_irqrestore(&proc->lock, flags); + thread_global_remove(t); + /* If last thread, process exits too - become zombie and wake parent */ if (proc->nr_threads == 0) { proc->exit_code = code; @@ -471,11 +560,8 @@ void thread_exit(int code) /* Wake up parent if it's waiting */ struct process *parent = proc->parent; - if (parent && parent->main_thread) { - if (parent->main_thread->state == TASK_INTERRUPTIBLE) { - parent->main_thread->state = TASK_RUNNING; - sched_add(parent->main_thread); - } + if (parent) { + thread_wakeup(parent); } } else { t->state = TASK_DEAD; @@ -505,6 +591,25 @@ void process_exit(int code) proc->exit_code = code; + /* + * Reparent children to init so someone can always reap them. + */ + if (init_process && init_process != proc) { + u64 flags, init_flags; + spin_lock_irqsave(&proc->lock, &flags); + while (!list_empty(&proc->children)) { + struct process *child = list_first_entry(&proc->children, + struct process, sibling); + list_del_init(&child->sibling); + child->parent = init_process; + + spin_lock_irqsave(&init_process->lock, &init_flags); + list_add_tail(&child->sibling, &init_process->children); + spin_unlock_irqrestore(&init_process->lock, init_flags); + } + spin_unlock_irqrestore(&proc->lock, flags); + } + /* TODO: * - Terminate all threads * - Close all file descriptors @@ -553,7 +658,12 @@ pid_t process_fork(void) if (parent->mm) { child->mm = vmm_clone_address_space(parent->mm); if (!child->mm) { - /* TODO: cleanup child */ + spin_lock_irqsave(&parent->lock, &flags); + if (!list_empty(&child->sibling)) { + list_del_init(&child->sibling); + } + spin_unlock_irqrestore(&parent->lock, flags); + process_reap(child); return -1; } } @@ -561,7 +671,12 @@ pid_t process_fork(void) /* Create child's main thread as copy of parent thread */ struct thread *child_thread = kmalloc(sizeof(struct thread)); if (!child_thread) { - /* TODO: cleanup */ + spin_lock_irqsave(&parent->lock, &flags); + if (!list_empty(&child->sibling)) { + list_del_init(&child->sibling); + } + spin_unlock_irqrestore(&parent->lock, flags); + process_reap(child); return -1; } @@ -572,6 +687,12 @@ pid_t process_fork(void) child_thread->kernel_stack = alloc_kernel_stack(); if (!child_thread->kernel_stack) { kfree(child_thread); + spin_lock_irqsave(&parent->lock, &flags); + if (!list_empty(&child->sibling)) { + list_del_init(&child->sibling); + } + spin_unlock_irqrestore(&parent->lock, flags); + process_reap(child); return -1; } @@ -595,6 +716,7 @@ pid_t process_fork(void) INIT_LIST_HEAD(&child_thread->run_list); INIT_LIST_HEAD(&child_thread->thread_list); INIT_LIST_HEAD(&child_thread->wait_list); + INIT_LIST_HEAD(&child_thread->all_list); /* Set up context so child returns from fork with 0 */ /* The context's RIP should point to ret_from_fork */ @@ -646,6 +768,8 @@ pid_t process_fork(void) child->main_thread = child_thread; spin_unlock_irqrestore(&child->lock, flags); + thread_global_add(child_thread); + /* Add child thread to scheduler */ child_thread->flags &= ~TF_FORKING; sched_add(child_thread); @@ -660,8 +784,7 @@ pid_t process_fork(void) pid_t process_wait(int *status) { struct process *proc = get_current_process(); - struct thread *t = current_thread; - if (!proc || !t) { + if (!proc || !current_thread) { return -1; } @@ -687,24 +810,21 @@ pid_t process_wait(int *status) } /* Remove from children list */ - list_del(&child->sibling); + list_del_init(&child->sibling); spin_unlock_irqrestore(&proc->lock, flags); - /* Free child resources */ - free_pid(pid); + process_reap(child); return pid; } } - /* No zombie children - block and wait */ - t->state = TASK_INTERRUPTIBLE; spin_unlock_irqrestore(&proc->lock, flags); } - /* Schedule away - child's exit will wake us */ - schedule(); + /* No zombie children - block until a child exits. */ + thread_sleep(proc); /* We were woken up - check again for zombies */ goto retry; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c505879..ad27e25 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -267,6 +267,12 @@ void schedule(void) struct thread *next; u64 flags; + if (!prev) { + prev = rq->idle; + current_thread = prev; + rq->curr = prev; + } + preempt_disable(); spin_lock_irqsave(&rq->lock, &flags); @@ -457,21 +463,23 @@ void thread_sleep(void *channel) void thread_wakeup(void *channel) { - /* Wake all threads sleeping on this channel */ - /* This is a simple implementation that scans all run queues */ - /* TODO: Use a hash table for efficiency */ - - struct run_queue *rq = this_rq(); - if (!rq) return; - - /* For now, just look at threads in the system */ - /* In a real system we'd track sleeping threads separately */ + /* Wake all threads sleeping on this channel. */ extern struct list_head all_threads; extern spinlock_t thread_list_lock; - /* We can't easily iterate all threads without the list */ - /* For now, this is a no-op - sched_wakeup handles direct wakeups */ - (void)channel; + u64 flags; + spin_lock_irqsave(&thread_list_lock, &flags); + + struct thread *t; + list_for_each_entry(t, &all_threads, all_list) { + if (t->wait_channel == channel && + (t->state == TASK_INTERRUPTIBLE || + t->state == TASK_UNINTERRUPTIBLE)) { + sched_wakeup(t); + } + } + + spin_unlock_irqrestore(&thread_list_lock, flags); } /* diff --git a/kernel/syscall/dispatch.c b/kernel/syscall/dispatch.c index 7d85d0f..80542be 100644 --- a/kernel/syscall/dispatch.c +++ b/kernel/syscall/dispatch.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -15,8 +16,6 @@ /* External functions */ extern int kprintf(const char *fmt, ...); extern void *memset(void *s, int c, size_t n); -extern size_t strlen(const char *s); -extern int strcmp(const char *s1, const char *s2); extern char *strstr(const char *haystack, const char *needle); /* Assembly entry point */ @@ -86,13 +85,32 @@ static i64 sys_yield(void) /* SYS_DEBUG_PRINT - Debug print (for testing) */ static i64 sys_debug_print(const char *msg, u64 len) { - /* TODO: Validate user pointer */ - /* For now, just print directly (UNSAFE - for testing only) */ - for (u64 i = 0; i < len && i < 256; i++) { - extern void serial_putc(char c); - serial_putc(msg[i]); + if (len == 0) { + return 0; } - return (i64)len; + if (!msg) { + return -EFAULT; + } + + extern void serial_putc(char c); + + char chunk[128]; + u64 total = 0; + while (total < len) { + size_t n = (size_t)MIN((u64)sizeof(chunk), len - total); + int ret = copy_from_user(chunk, msg + total, n); + if (ret < 0) { + return (total > 0) ? (i64)total : ret; + } + + for (size_t i = 0; i < n; i++) { + serial_putc(chunk[i]); + } + + total += n; + } + + return (i64)total; } /* SYS_READ - Read from file descriptor (minimal implementation) */ @@ -100,7 +118,13 @@ static i64 sys_read(int fd, char *buf, u64 count) { /* For now, only support stdin (fd 0) */ if (fd != 0) { - return -1; /* EBADF */ + return -EBADF; + } + if (count == 0) { + return 0; + } + if (!buf) { + return -EFAULT; } extern int serial_getc(void); @@ -123,19 +147,30 @@ static i64 sys_read(int fd, char *buf, u64 count) break; } - buf[i++] = (char)c; + char out = (char)c; /* Echo the character */ - serial_putc((char)c); + serial_putc(out); /* Stop at newline */ if (c == '\n' || c == '\r') { if (c == '\r') { - buf[i-1] = '\n'; + out = '\n'; serial_putc('\n'); } + int ret = copy_to_user(buf + i, &out, 1); + if (ret < 0) { + return (i > 0) ? (i64)i : ret; + } + i++; break; } + + int ret = copy_to_user(buf + i, &out, 1); + if (ret < 0) { + return (i > 0) ? (i64)i : ret; + } + i++; } return (i64)i; @@ -146,16 +181,34 @@ static i64 sys_write(int fd, const char *buf, u64 count) { /* For now, only support stdout (fd 1) and stderr (fd 2) */ if (fd != 1 && fd != 2) { - return -1; /* EBADF */ + return -EBADF; + } + if (count == 0) { + return 0; + } + if (!buf) { + return -EFAULT; } - /* TODO: Validate user buffer pointer */ - for (u64 i = 0; i < count; i++) { - extern void serial_putc(char c); - serial_putc(buf[i]); + extern void serial_putc(char c); + + char chunk[128]; + u64 total = 0; + while (total < count) { + size_t n = (size_t)MIN((u64)sizeof(chunk), count - total); + int ret = copy_from_user(chunk, buf + total, n); + if (ret < 0) { + return (total > 0) ? (i64)total : ret; + } + + for (size_t i = 0; i < n; i++) { + serial_putc(chunk[i]); + } + + total += n; } - return (i64)count; + return (i64)total; } /* @@ -190,19 +243,25 @@ static i64 sys_exec(const char *path, char *const argv[], char *const envp[]) (void)envp; if (!path) { - return -1; /* EINVAL */ + return -EINVAL; + } + + char kpath[256]; + int path_len = copy_string_from_user(kpath, sizeof(kpath), path); + if (path_len < 0) { + return path_len; } /* Find the module in boot modules */ - struct cached_module *mod = find_boot_module(path); + struct cached_module *mod = find_boot_module(kpath); if (!mod) { - kprintf("exec: '%s' not found\n", path); - return -1; /* ENOENT */ + kprintf("exec: '%s' not found\n", kpath); + return -ENOENT; } /* Extract just the filename from path */ - const char *name = path; - const char *p = path; + const char *name = kpath; + const char *p = kpath; while (*p) { if (*p == '/') { name = p + 1; @@ -215,13 +274,26 @@ static i64 sys_exec(const char *path, char *const argv[], char *const envp[]) exec_replace(mod->address, mod->size, name); /* exec_replace never returns on success */ - return -1; + return -EIO; } /* SYS_WAIT - Wait for child process */ static i64 sys_wait(int *status) { - return (i64)process_wait(status); + int kstatus = 0; + pid_t pid = process_wait(status ? &kstatus : NULL); + if (pid < 0) { + return (i64)pid; + } + + if (status) { + int ret = copy_to_user(status, &kstatus, sizeof(kstatus)); + if (ret < 0) { + return ret; + } + } + + return (i64)pid; } /* @@ -244,13 +316,28 @@ static i64 sys_ipc_recv_impl(u32 ep_cap, u64 tag_ptr, u64 r1_ptr, u64 r2_ptr, u6 int result = ipc_recv_fast(ep_cap, &tag, regs); - /* Copy results back to user pointers (TODO: validate pointers) */ + /* Copy results back to user pointers */ if (result == IPC_OK && tag_ptr) { - *(u64 *)tag_ptr = tag; - if (r1_ptr) *(u64 *)r1_ptr = regs[0]; - if (r2_ptr) *(u64 *)r2_ptr = regs[1]; - if (r3_ptr) *(u64 *)r3_ptr = regs[2]; - if (r4_ptr) *(u64 *)r4_ptr = regs[3]; + int ret = copy_to_user((void *)tag_ptr, &tag, sizeof(tag)); + if (ret < 0) { + return ret; + } + if (r1_ptr) { + ret = copy_to_user((void *)r1_ptr, ®s[0], sizeof(regs[0])); + if (ret < 0) return ret; + } + if (r2_ptr) { + ret = copy_to_user((void *)r2_ptr, ®s[1], sizeof(regs[1])); + if (ret < 0) return ret; + } + if (r3_ptr) { + ret = copy_to_user((void *)r3_ptr, ®s[2], sizeof(regs[2])); + if (ret < 0) return ret; + } + if (r4_ptr) { + ret = copy_to_user((void *)r4_ptr, ®s[3], sizeof(regs[3])); + if (ret < 0) return ret; + } } return (i64)result; @@ -281,6 +368,7 @@ static i64 sys_endpoint_destroy_impl(u32 ep_id) } endpoint_destroy(ep); + endpoint_put(ep); return 0; } @@ -356,14 +444,14 @@ i64 syscall_dispatch(u64 nr, u64 arg1, u64 arg2, u64 arg3, /* Validate syscall number */ if (nr >= NR_SYSCALLS) { kprintf("[syscall] Invalid syscall number: %llu\n", nr); - return -1; /* ENOSYS */ + return -ENOSYS; } /* Get handler */ syscall_handler_t handler = syscall_table[nr]; if (!handler) { kprintf("[syscall] Unimplemented syscall: %llu\n", nr); - return -1; /* ENOSYS */ + return -ENOSYS; } /* Call handler */ From e9e1b286ebc12a7528add5a8dbf6b3227772eb88 Mon Sep 17 00:00:00 2001 From: Jack Gaffney Date: Sun, 15 Feb 2026 01:17:05 -0500 Subject: [PATCH 3/4] M7: add deterministic smoke/stress validation, CI, and architecture docs --- .github/workflows/ci.yml | 39 +++++++++++++++++++++ Makefile | 17 +++++++++ README.md | 20 +++++++++-- docs/ARCH_V2.md | 39 +++++++++++++++++++++ docs/AUDIT_2026-02.md | 44 +++++++++++++++++++++++ docs/CODEX_WORKFLOW.md | 76 ++++++++++++++++++++++++++++++++++++++++ docs/STATUS.md | 38 ++++++++++++++++++++ scripts/qemu_smoke.sh | 57 ++++++++++++++++++++++++++++++ scripts/qemu_stress.sh | 16 +++++++++ 9 files changed, 344 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 docs/ARCH_V2.md create mode 100644 docs/AUDIT_2026-02.md create mode 100644 docs/CODEX_WORKFLOW.md create mode 100644 docs/STATUS.md create mode 100755 scripts/qemu_smoke.sh create mode 100755 scripts/qemu_stress.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0b1c0a7 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI + +on: + push: + branches: + - main + - codex/** + pull_request: + +jobs: + build-and-smoke: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install toolchain + run: | + sudo apt-get update + sudo apt-get install -y \ + gcc-x86-64-linux-gnu \ + nasm \ + qemu-system-x86 \ + xorriso + + - name: Build + run: make all-user -j"$(nproc)" + + - name: Smoke + run: TIMEOUT_SECONDS=90 make smoke + + - name: Upload QEMU logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: qemu-logs + path: build/**/*.log + if-no-files-found: ignore diff --git a/Makefile b/Makefile index de53395..1c11678 100644 --- a/Makefile +++ b/Makefile @@ -253,6 +253,20 @@ compile_commands: @echo "" >> compile_commands.json @echo "]" >> compile_commands.json +.PHONY: smoke +smoke: $(ISO) + @echo "Running QEMU smoke checks..." + @./scripts/qemu_smoke.sh + +.PHONY: stress +stress: $(ISO) + @echo "Running QEMU stress checks..." + @ITERATIONS=$${ITERATIONS:-5} ./scripts/qemu_stress.sh + +.PHONY: check +check: all-user smoke + @echo "Validation check passed" + # Help .PHONY: help help: @@ -265,6 +279,9 @@ help: @echo " run-window Run with serial on PTY (use screen to connect)" @echo " debug Run in QEMU with GDB server" @echo " run-kernel Run kernel directly (no ISO)" + @echo " smoke Run deterministic QEMU smoke checks" + @echo " stress Run repeated QEMU smoke checks" + @echo " check Build and run smoke checks" @echo " clean Remove build artifacts" @echo " limine Download Limine bootloader" @echo " info Show build configuration" diff --git a/README.md b/README.md index 713cd63..98fbe61 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,9 @@ sudo apt install gcc-x86-64-linux-gnu nasm xorriso qemu-system-x86 # Build bootable ISO make +# Build kernel + userspace + ISO explicitly +make all-user + # Run in QEMU make run @@ -68,6 +71,11 @@ make clean # Show build configuration make info + +# Run deterministic validation gates +make smoke +make stress +make check ``` ## Project Structure @@ -144,13 +152,21 @@ Ocean includes an interactive shell with the following built-in commands: External commands (like `ls`) are loaded from boot modules and executed via fork/exec. +## Docs + +- [docs/STATUS.md](docs/STATUS.md) - Current snapshot and roadmap +- [docs/CODEX_WORKFLOW.md](docs/CODEX_WORKFLOW.md) - Codex development workflow + ## Development ### Running Tests ```bash -# Run in QEMU with serial output -make run +# Build and run deterministic smoke test +make check + +# Run stress loop (repeated boots) +make stress # Debug with GDB make debug diff --git a/docs/ARCH_V2.md b/docs/ARCH_V2.md new file mode 100644 index 0000000..ca995ff --- /dev/null +++ b/docs/ARCH_V2.md @@ -0,0 +1,39 @@ +# Ocean Architecture v2 (In-Progress) + +## Goals +- Keep kernel minimal while hardening safety boundaries. +- Move from ad hoc pointer handling to explicit validated user-memory access. +- Establish deterministic validation gates for every kernel/userspace change. + +## Kernel Boundaries +1. Syscall boundary +- All user pointers must flow through `uaccess` helpers. +- Syscall handlers return negative errno-style values for failure. + +2. IPC boundary +- Endpoints are refcounted objects with explicit dead/listed state. +- Endpoint destruction is asynchronous with respect to outstanding references. +- Wait queue nodes are allocation-backed and not stack-persistent across sleep. + +3. Scheduler boundary +- Channel sleep/wakeup is implemented via global thread registry. +- Sleepers are woken by channel identity and transitioned through scheduler APIs only. + +4. Process boundary +- Parent/child wait semantics include real reaping and resource teardown. +- Children are reparanted to init on parent exit. + +5. Memory boundary +- VMM page accounting tracks actual mapping/unmapping. +- Slab and page allocator interaction uses page flags (`PG_SLAB`, compound head/order) to free correctly. + +## Validation Contract +- `make all-user`: compile kernel + userspace + ISO. +- `make smoke`: deterministic boot + init signatures, panic/fault signature scan. +- `make stress`: repeated smoke cycles. +- CI (`.github/workflows/ci.yml`) runs build + smoke and archives serial logs. + +## Current Non-Goals +- Full SMP correctness and per-CPU scheduler isolation. +- Full capability transfer/cspace enforcement. +- Fully wired production-grade VFS/block/filesystem pipeline. diff --git a/docs/AUDIT_2026-02.md b/docs/AUDIT_2026-02.md new file mode 100644 index 0000000..420c010 --- /dev/null +++ b/docs/AUDIT_2026-02.md @@ -0,0 +1,44 @@ +# Ocean Deep Audit (February 2026) + +## Scope +- Full repository static audit across kernel, userspace servers, drivers, and shared headers. +- Correctness-first remediation of critical kernel defects that could crash or corrupt runtime state. +- Validation tooling expansion to support repeatable build + smoke + stress gates. + +## Critical Findings and Fixes +1. User pointer safety gaps in syscall handlers +- Risk: direct dereference of user pointers in syscall paths (`read`, `write`, `debug_print`, `exec`, `wait`, IPC receive output). +- Fix: introduced centralized user access layer in `kernel/mm/uaccess.c` and `kernel/include/ocean/uaccess.h`; syscall handlers now use `copy_from_user`, `copy_to_user`, and `copy_string_from_user`. + +2. IPC endpoint lifetime/use-after-free race +- Risk: `endpoint_destroy()` freed endpoints while references could still exist. +- Fix: converted to two-phase teardown: mark dead + remove from global list, free only at final `endpoint_put()` refcount drop. + +3. IPC wait object lifetime hazards +- Risk: stack-backed wait objects were queued across scheduler handoffs. +- Fix: wait objects now allocate on heap and are defensively removed on wakeup paths to avoid dangling queue links. + +4. Channel wakeups were effectively non-functional +- Risk: `thread_wakeup()` was a no-op, breaking generic sleep/wake flows. +- Fix: introduced global thread registry and real channel scanning wakeup in scheduler. + +5. Process lifecycle leaks and zombie reaping gaps +- Risk: waited children were not fully reclaimed; failed fork paths leaked process objects. +- Fix: added `process_reap()` for full child cleanup, wired into `process_wait()`, and hardened fork failure cleanup. + +6. Kernel thread entry argument bug +- Risk: kernel thread creation ignored function arguments. +- Fix: added `kthread_entry` trampoline passing function/arg via saved callee registers. + +7. Memory accounting and allocator correctness bugs +- Risk: `vmm_unmap_region()` decremented `total_vm` by requested size instead of actual unmapped pages; slab free path mishandled large allocations. +- Fix: actual unmapped page accounting; slab now tags slab pages via `PG_SLAB` and frees compound allocations with correct order. + +8. Page fault handling not integrated into exception path +- Risk: all exceptions halted system before VMM fault resolution could run. +- Fix: IDT exception path now forwards page faults to `page_fault_handler()` and returns on successful resolution. + +## Remaining High-Value Work +- Complete IPC `call/reply/reply_recv` semantics and capability transfer enforcement. +- Replace simulated server behavior with end-to-end live IPC-backed operations across mem/proc/vfs/blk. +- Add deeper runtime stress/fault-injection for process and memory churn paths. diff --git a/docs/CODEX_WORKFLOW.md b/docs/CODEX_WORKFLOW.md new file mode 100644 index 0000000..6ae62b5 --- /dev/null +++ b/docs/CODEX_WORKFLOW.md @@ -0,0 +1,76 @@ +# Codex Development Workflow + +This document describes a conservative, repeatable workflow for continuing Ocean development with Codex. + +**Workflow** +1. Intake +2. Targeted inspection +3. Plan +4. Implement +5. Verify +6. Summarize + +**Intake** +- Restate the goal and scope. +- Identify constraints and success criteria. +- Confirm whether to run `make` or QEMU runs. + +**Targeted Inspection** +- Locate the relevant subsystem first in `kernel/`, then `servers/`, `lib/`, `drivers/`, `fs/`, and `include/`. +- Read headers and call sites before proposing changes. + +**Plan** +- Propose a minimal set of files and changes. +- Call out risks and missing pieces. +- Explicitly note when behavior is simulated or stubbed. + +**Implement** +- Prefer small diffs and incremental commits. +- Preserve existing code style and conventions. +- Avoid cross-cutting refactors unless requested. + +**Verify** +- Default to no build or QEMU runs unless asked. +- Run `make` when explicitly requested or when the change is large and the user approves. + +**Summarize** +- Describe what changed and why. +- Point to key files. +- Note any follow-up risks or gaps. + +**Validation Matrix** + +| Change Type | Default Action | When to Run `make` | When to Run QEMU | +| --- | --- | --- | --- | +| Documentation-only | No build | Only on request | Only on request | +| Kernel changes | Analyze first | With explicit approval | Only on request | +| Userspace server changes | Analyze first | With explicit approval | Only on request | +| Boot configuration changes | Analyze first | With explicit approval | Only on request | + +**Change Checklists** + +New syscall +- Add number and documentation in `kernel/include/ocean/syscall.h` and `lib/libocean/include/ocean/syscall.h`. +- Implement handler in `kernel/syscall/dispatch.c`. +- Add user pointer validation if user memory is touched. +- Update any userspace wrappers in `lib/libocean`. + +IPC protocol change +- Update `include/ocean/ipc_proto.h`. +- Update server implementations that use the protocol. +- Check kernel IPC fast path or endpoint semantics if changed. + +New server or driver +- Add source under `servers/` or `drivers/` or `fs/`. +- Add build rules in `user.mk`. +- Decide if it should be loaded as a boot module in `limine.conf`. +- Register well-known endpoints if applicable. + +Boot module changes +- Update `limine.conf` to add or remove modules. +- Ensure `Makefile` copies the module into the ISO. + +**Failure Handling** +- If the toolchain is missing, report the exact missing component and reference `tools/setup-toolchain.sh`. +- If QEMU or xorriso is missing, explain the limitation and continue with code changes. +- If a change cannot be verified locally, document the gap and a suggested command for the user to run. diff --git a/docs/STATUS.md b/docs/STATUS.md new file mode 100644 index 0000000..266eb0a --- /dev/null +++ b/docs/STATUS.md @@ -0,0 +1,38 @@ +# Project Status + +Snapshot date: February 15, 2026 + +**Snapshot** +Ocean is an educational x86_64 microkernel with a working boot path, basic kernel subsystems, and a small userspace. The kernel boots via Limine into a higher-half layout, initializes CPU, memory, scheduler, IPC, and syscalls, then starts init and the shell from boot modules. This snapshot includes significant kernel safety hardening: centralized user-pointer validation, improved endpoint lifetime management, functional channel wakeups, and deterministic QEMU smoke/stress tooling. + +**What Works** +- Boot and arch: Limine boot, higher-half kernel, early serial console, GDT/TSS, IDT/ISR, PIT timer, SYSCALL entry, PIC remap. +- Memory: PMM with bitmap and buddy allocator; VMM with VMAs and paging; kernel heap via slab. +- Scheduler: O(1) priority queues, preemptive tick, single-CPU only with per-CPU scaffolding. +- Processes: basic process and thread structs, fork/exec/wait path, kernel threads. +- IPC: endpoints and synchronous send/recv with fast path. +- Syscall safety: user buffer/string access now goes through kernel `uaccess` helpers. +- Process lifecycle: waited children are reaped with resource cleanup. +- Validation tooling: `make smoke`, `make stress`, and CI smoke workflow. +- Syscalls: minimal set; read and write backed by serial I/O. +- Userspace: minimal libc, init server, shell, and small utilities. + +**What Is Stubbed or Simulated** +- IPC call/reply semantics, capability transfer, and cspace integration. +- Process lifecycle beyond reaping (signals, multithreaded exit edge cases). +- Memory server, process server, VFS server, block server, and drivers are simulated and do not yet perform real kernel-mediated operations. +- Filesystem drivers and block drivers are not wired into live IPC or VFS routing. +- Boot modules load only init, shell, and a few utilities in `limine.conf`. + +**Kernel-first Improvements** +- Complete IPC reply/call semantics, including reply endpoints and tracking caller context. +- Complete capability transfer and cspace enforcement for endpoints and other objects. +- Finish process lifecycle behavior beyond wait/reap (signals, multithread edge cases). +- Continue memory correctness work: page refcounting and COW teardown. +- Harden scheduler edge cases and build toward real SMP enablement. + +**Secondary Improvements** +- Wire init to actually spawn services and register well-known endpoints. +- Implement real IPC request/response loops in mem, proc, vfs, and blk servers. +- Integrate filesystem drivers with VFS and block server. +- Add developer tooling: repeatable QEMU run configs, compile_commands generation in CI, and basic smoke tests. diff --git a/scripts/qemu_smoke.sh b/scripts/qemu_smoke.sh new file mode 100755 index 0000000..d44cd38 --- /dev/null +++ b/scripts/qemu_smoke.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -euo pipefail + +LOG_FILE="${1:-build/qemu-smoke.log}" +TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-45}" + +mkdir -p "$(dirname "$LOG_FILE")" +: >"$LOG_FILE" + +QEMU_CMD=( + qemu-system-x86_64 + -cdrom ocean.iso + -serial "file:$LOG_FILE" + -display none + -m 256M + -smp 2 + -no-reboot + -no-shutdown +) + +"${QEMU_CMD[@]}" & +QEMU_PID=$! + +cleanup() { + if kill -0 "$QEMU_PID" 2>/dev/null; then + kill "$QEMU_PID" 2>/dev/null || true + wait "$QEMU_PID" 2>/dev/null || true + fi +} +trap cleanup EXIT + +deadline=$((SECONDS + TIMEOUT_SECONDS)) +while (( SECONDS < deadline )); do + if grep -q "Kernel initialization complete" "$LOG_FILE" && + grep -q "Init started with PID" "$LOG_FILE"; then + break + fi + sleep 1 +done + +if ! grep -q "Kernel initialization complete" "$LOG_FILE"; then + echo "Smoke check failed: kernel init signature missing" + exit 1 +fi + +if ! grep -q "Init started with PID" "$LOG_FILE"; then + echo "Smoke check failed: init launch signature missing" + exit 1 +fi + +if grep -Eq "Assertion failed|Unhandled page fault|panic|System halted" "$LOG_FILE"; then + echo "Smoke check failed: fatal signature found in serial log" + grep -nE "Assertion failed|Unhandled page fault|panic|System halted" "$LOG_FILE" || true + exit 1 +fi + +echo "Smoke check passed: $LOG_FILE" diff --git a/scripts/qemu_stress.sh b/scripts/qemu_stress.sh new file mode 100755 index 0000000..8a35fc6 --- /dev/null +++ b/scripts/qemu_stress.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +ITERATIONS="${ITERATIONS:-5}" +BASE_LOG_DIR="${1:-build/stress}" +TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-35}" + +mkdir -p "$BASE_LOG_DIR" + +for i in $(seq 1 "$ITERATIONS"); do + log_file="$BASE_LOG_DIR/run-${i}.log" + echo "[stress] iteration $i/$ITERATIONS" + TIMEOUT_SECONDS="$TIMEOUT_SECONDS" ./scripts/qemu_smoke.sh "$log_file" +done + +echo "Stress check passed: $ITERATIONS iterations" From 098e0bada6635e86877e06da2938dcbfbe2c8a2f Mon Sep 17 00:00:00 2001 From: Jack Gaffney Date: Sun, 15 Feb 2026 10:04:18 -0500 Subject: [PATCH 4/4] CI: fetch limine and fail fast when ISO prerequisites are missing --- .github/workflows/ci.yml | 3 +++ Makefile | 25 +++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b1c0a7..5a02769 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,9 @@ jobs: qemu-system-x86 \ xorriso + - name: Fetch Limine boot files + run: make limine + - name: Build run: make all-user -j"$(nproc)" diff --git a/Makefile b/Makefile index 1c11678..dd1429a 100644 --- a/Makefile +++ b/Makefile @@ -123,24 +123,37 @@ $(ISO): $(BUILD_DIR)/$(KERNEL) $(SERVER_BINS) limine.conf @cp $(BUILD_DIR)/cat.elf $(ISO_DIR)/boot/ 2>/dev/null || true @cp $(BUILD_DIR)/ls.elf $(ISO_DIR)/boot/ 2>/dev/null || true @cp limine.conf $(ISO_DIR)/boot/ - @# Try to find limine in common locations - @if [ -d "/usr/share/limine" ]; then \ + @# Try to find Limine boot files in common locations + @if [ -f "/usr/share/limine/limine-bios.sys" ] && \ + [ -f "/usr/share/limine/limine-bios-cd.bin" ] && \ + [ -f "/usr/share/limine/limine-uefi-cd.bin" ]; then \ cp /usr/share/limine/limine-bios.sys $(ISO_DIR)/boot/; \ cp /usr/share/limine/limine-bios-cd.bin $(ISO_DIR)/boot/; \ cp /usr/share/limine/limine-uefi-cd.bin $(ISO_DIR)/boot/; \ - elif [ -d "limine" ]; then \ + elif [ -f "limine/limine-bios.sys" ] && \ + [ -f "limine/limine-bios-cd.bin" ] && \ + [ -f "limine/limine-uefi-cd.bin" ]; then \ cp limine/limine-bios.sys $(ISO_DIR)/boot/; \ cp limine/limine-bios-cd.bin $(ISO_DIR)/boot/; \ cp limine/limine-uefi-cd.bin $(ISO_DIR)/boot/; \ else \ - echo "Warning: Limine not found, ISO may not be bootable"; \ + echo "Error: Limine boot files not found."; \ + echo "Run 'make limine' or install Limine files under /usr/share/limine."; \ + exit 1; \ + fi + @if ! command -v xorriso >/dev/null 2>&1; then \ + echo "Error: xorriso not found. Install xorriso to build bootable ISOs."; \ + exit 1; \ fi @xorriso -as mkisofs -b boot/limine-bios-cd.bin \ -no-emul-boot -boot-load-size 4 -boot-info-table \ --efi-boot boot/limine-uefi-cd.bin \ -efi-boot-part --efi-boot-image --protective-msdos-label \ - $(ISO_DIR) -o $@ 2>/dev/null || \ - echo "Note: xorriso not found. Install it for ISO creation." + $(ISO_DIR) -o $@ 2>/dev/null + @if [ ! -f "$@" ]; then \ + echo "Error: ISO creation failed; output file '$@' not found."; \ + exit 1; \ + fi @if [ -f "/usr/bin/limine" ]; then \ limine bios-install $@ 2>/dev/null || true; \ elif [ -f "limine/limine" ]; then \