From 0f80bb80547e8f4ed7614f0e78e2a8c4e955d941 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 15 Apr 2026 21:52:33 +0100 Subject: [PATCH 01/15] fs/tmpfs: fix data loss on reclaim tmpfs was implementing ->writepage() as a "everything went well" so filemap_writepages works correctly. However, it's not correct on reclaim - reclaim thinks it can write it back, it "does" but data is not preserved, thus on the next refault it will read all-zeros back, instead of whatever was there before. Instead of that, add a noop_fsyncdata(). Now tmpfs never uses filemap_writepages(), and thus ->writepage() doesn't need to be implemented. Reclaim takes it as a sign the filesystem can't write anything back, and all is well. Signed-off-by: Pedro Falcato --- kernel/include/onyx/libfs.h | 4 +++- kernel/kernel/fs/libfs.c | 5 +++++ kernel/kernel/fs/tmpfs.cpp | 11 ++--------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/include/onyx/libfs.h b/kernel/include/onyx/libfs.h index f8d554d54..b94a00bf8 100644 --- a/kernel/include/onyx/libfs.h +++ b/kernel/include/onyx/libfs.h @@ -85,12 +85,14 @@ static inline int libfs_no_unlink(const char *name, int flags, struct dentry *di return -EROFS; } +struct writepages_info; + off_t libfs_put_dots(struct dirent *buf, off_t off, struct dentry *dent); void put_dir(const char *name, off_t off, ino_t ino, unsigned int dtype, struct dirent *buf); int default_stat(struct stat *buf, const struct path *path); off_t generic_file_llseek(struct file *filp, off_t offset, int whence); - +int noop_fsyncdata(struct inode *ino, struct writepages_info *wpinfo); __END_CDECLS #endif diff --git a/kernel/kernel/fs/libfs.c b/kernel/kernel/fs/libfs.c index 18b8fef0f..ae16344b7 100644 --- a/kernel/kernel/fs/libfs.c +++ b/kernel/kernel/fs/libfs.c @@ -47,3 +47,8 @@ void put_dir(const char *name, off_t off, ino_t ino, unsigned int dtype, struct buf->d_type = dtype; buf->d_reclen = sizeof(struct dirent) - (256 - (len + 1)); } + +int noop_fsyncdata(struct inode *ino, struct writepages_info *wpinfo) +{ + return 0; +} diff --git a/kernel/kernel/fs/tmpfs.cpp b/kernel/kernel/fs/tmpfs.cpp index 77e0a8159..358e4ce7f 100644 --- a/kernel/kernel/fs/tmpfs.cpp +++ b/kernel/kernel/fs/tmpfs.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -184,13 +185,6 @@ ssize_t tmpfs_readpage(struct page *page, size_t offset, struct inode *ino) return PAGE_SIZE; } -ssize_t tmpfs_writepage(struct vm_object *obj, struct page *page, size_t offset) REQUIRES(page) - RELEASE(page) -{ - unlock_page(page); - return PAGE_SIZE; -} - int tmpfs_open(struct dentry *dir, const char *name, struct dentry *dentry) { /* This a no-op, since names are either cached or non-existent in our tmpfs */ @@ -291,7 +285,7 @@ const struct file_ops tmpfs_fops = { .fallocate = nullptr, .read_iter = filemap_read_iter, .write_iter = filemap_write_iter, - .fsyncdata = filemap_writepages, + .fsyncdata = noop_fsyncdata, }; const struct inode_operations tmpfs_ino_ops = { @@ -317,7 +311,6 @@ static void tmpfs_free_page(struct vm_object *vmo, struct page *page) const static vm_object_ops tmpfs_vmops = { .free_page = tmpfs_free_page, - .writepage = tmpfs_writepage, .readpage = tmpfs_readpage, .prepare_write = tmpfs_prepare_write, }; From e88de38f59ed7e30ac95c33ffc2fdf6532a3ff5b Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 17:13:58 +0100 Subject: [PATCH 02/15] fs/ext2: unregister the shrinker when mount fails If the mount fails, it is imperative to unregister the shrinker. Otherwise it will remain registered and cause UAFs later on. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/ext2/ext2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kernel/fs/ext2/ext2.cpp b/kernel/kernel/fs/ext2/ext2.cpp index 25cb1ec1c..7fad8bf16 100644 --- a/kernel/kernel/fs/ext2/ext2.cpp +++ b/kernel/kernel/fs/ext2/ext2.cpp @@ -899,6 +899,7 @@ struct superblock *ext2_mount_partition(struct vfs_mount_info *info) error: if (b) block_buf_put(b); + shrinker_unregister(&sb->s_shrinker); delete sb; return (struct superblock *) ERR_PTR(err); From 9362875d50ad8417b1c95e7bdef13d1205652309 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 17:21:51 +0100 Subject: [PATCH 03/15] tty: chunk writes Instead of allocating everything in one go (uses a lot of memory, more likely to fail), chunk the writes by PAGE_SIZE at a time. Signed-off-by: Pedro Falcato --- kernel/kernel/tty/tty.cpp | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/kernel/kernel/tty/tty.cpp b/kernel/kernel/tty/tty.cpp index 969a7ecaa..cbdc045da 100644 --- a/kernel/kernel/tty/tty.cpp +++ b/kernel/kernel/tty/tty.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 - 2025 Pedro Falcato + * Copyright (c) 2016 - 2026 Pedro Falcato * This file is part of Onyx, and is released under the terms of the GPLv2 License * check LICENSE at the root directory for more information * @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -302,20 +303,38 @@ void tty_write_string_kernel(const char *data) size_t ttydevfs_write(size_t offset, size_t len, void *ubuffer, struct file *f) { struct tty *tty = (struct tty *) f->private_data; + ssize_t written = 0, err, to_write; - char *buffer = (char *) malloc(len); + char *buffer = (char *) kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return (size_t) -ENOMEM; - if (copy_from_user(buffer, ubuffer, len) < 0) + while (len > 0) { - free(buffer); - return -EFAULT; + to_write = min(PAGE_SIZE, len); + if (copy_from_user(buffer, (char *) ubuffer + written, to_write) < 0) + { + written = written ?: -EFAULT; + break; + } + + err = tty_write(buffer, to_write, tty); + if (err < 0) + { + written = written ?: err; + break; + } + + /* zero? */ + if (WARN_ON(err == 0)) + break; + + written += err; + len -= err; } - len = tty_write(buffer, len, tty); free(buffer); - return len; + return written; } size_t strnewlinelen(const char *str, unsigned int _len) From 33ff563bda389b0f889f49b3042a6f4a8587050d Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 17:23:05 +0100 Subject: [PATCH 04/15] mm/mremap: remove WARN_ON_ONCE Confusing and annoying to the user. Signed-off-by: Pedro Falcato --- kernel/kernel/mm/vm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/kernel/mm/vm.c b/kernel/kernel/mm/vm.c index 266d5da45..5e15e8186 100644 --- a/kernel/kernel/mm/vm.c +++ b/kernel/kernel/mm/vm.c @@ -2360,7 +2360,6 @@ static bool limits_are_contained(struct vm_area_struct *reg, unsigned long start void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) { // TODO: This is broken. - WARN_ON_ONCE(1); return (void *) -ENOSYS; /* Check http://man7.org/linux/man-pages/man2/mremap.2.html for documentation */ bool may_move = flags & MREMAP_MAYMOVE; From dfca3579b163ed4c5b4252569818ebf5b2b0a0c0 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 18:32:40 +0100 Subject: [PATCH 05/15] hung_task: ignore tasks that haven't switched yet Signed-off-by: Pedro Falcato --- kernel/kernel/hung_task.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/kernel/hung_task.c b/kernel/kernel/hung_task.c index b0a8c3915..25f4f7789 100644 --- a/kernel/kernel/hung_task.c +++ b/kernel/kernel/hung_task.c @@ -31,6 +31,9 @@ static void check_for_hung_tasks(void) rcu_read_lock(); list_for_each_entry_rcu (task, &tasklist, tasklist_node) { + /* Hasn't switched yet, that's fine. */ + if (task->last_switch_time == 0) + continue; /* Not its time, yet. */ if (task->last_switch_time + TIMEOUT > now) continue; From de8e6c677467e28b0c25da36ae2c6c921775c8bb Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 21:38:05 +0100 Subject: [PATCH 06/15] fs: add fadvise64() Add fadvise64(), backend to fadvise() in userspace and posix_fadvise() as well. With it, implement POSIX_FADV_WILLNEED. Signed-off-by: Pedro Falcato --- kernel/arch/riscv64/syscall_table.json | 25 ++++++++++++++ kernel/arch/x86_64/syscall_table.json | 25 ++++++++++++++ kernel/include/onyx/readahead.h | 4 ++- kernel/kernel/fs/Makefile | 2 +- kernel/kernel/fs/fadvise.c | 45 ++++++++++++++++++++++++++ kernel/kernel/fs/readahead.c | 24 +++++++++++++- 6 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 kernel/kernel/fs/fadvise.c diff --git a/kernel/arch/riscv64/syscall_table.json b/kernel/arch/riscv64/syscall_table.json index ec0bcafad..9e955b8a2 100644 --- a/kernel/arch/riscv64/syscall_table.json +++ b/kernel/arch/riscv64/syscall_table.json @@ -3374,5 +3374,30 @@ ], "return_type": "int", "abi": "c" + }, + { + "name": "fadvise64", + "nr": 221, + "nr_args": 4, + "args": [ + [ + "int", + "fd" + ], + [ + "off_t", + "offset" + ], + [ + "size_t", + "len" + ], + [ + "int", + "advice" + ] + ], + "return_type": "int", + "abi": "c" } ] diff --git a/kernel/arch/x86_64/syscall_table.json b/kernel/arch/x86_64/syscall_table.json index 2962a32e4..8112f5c5d 100644 --- a/kernel/arch/x86_64/syscall_table.json +++ b/kernel/arch/x86_64/syscall_table.json @@ -3440,5 +3440,30 @@ ], "return_type": "int", "abi": "c" + }, + { + "name": "fadvise64", + "nr": 221, + "nr_args": 4, + "args": [ + [ + "int", + "fd" + ], + [ + "off_t", + "offset" + ], + [ + "size_t", + "len" + ], + [ + "int", + "advice" + ] + ], + "return_type": "int", + "abi": "c" } ] diff --git a/kernel/include/onyx/readahead.h b/kernel/include/onyx/readahead.h index cf3fc8ad0..f97647d04 100644 --- a/kernel/include/onyx/readahead.h +++ b/kernel/include/onyx/readahead.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Pedro Falcato + * Copyright (c) 2024 - 2026 Pedro Falcato * This file is part of Onyx, and is released under the terms of the GPLv2 License * check LICENSE at the root directory for more information * @@ -9,6 +9,7 @@ #define _ONYX_READAHEAD_H #include +#include __BEGIN_CDECLS @@ -17,6 +18,7 @@ int filemap_do_readahead_async(struct inode *inode, struct readahead_state *ra_s int filemap_do_readahead_sync(struct inode *inode, struct readahead_state *ra_state, unsigned long pgoff); +void do_force_readahead(struct inode *inode, off_t offset, size_t len); __END_CDECLS #endif diff --git a/kernel/kernel/fs/Makefile b/kernel/kernel/fs/Makefile index 6201846bb..e40ca83e9 100644 --- a/kernel/kernel/fs/Makefile +++ b/kernel/kernel/fs/Makefile @@ -1,6 +1,6 @@ fs-y:= anon_inode.o dentry.o dev.o file.o null.o partition.o pipe.o poll.o pseudo.o \ superblock.o sysfs.o tmpfs.o vfs.o zero.o buffer.o inode.o namei.o filemap.o writeback.o readahead.o \ - flock.o mount.o d_path.o libfs.o seq_file.o coredump.o eventfd.o xattr.o + flock.o mount.o d_path.o libfs.o seq_file.o coredump.o eventfd.o xattr.o fadvise.o include kernel/fs/ext2/Makefile include kernel/fs/proc/Makefile diff --git a/kernel/kernel/fs/fadvise.c b/kernel/kernel/fs/fadvise.c new file mode 100644 index 000000000..aed876dab --- /dev/null +++ b/kernel/kernel/fs/fadvise.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2026 Pedro Falcato + * This file is part of Onyx, and is released under the terms of the GPLv2 License + * check LICENSE at the root directory for more information + * + * SPDX-License-Identifier: GPL-2.0-only + */ +#include + +#include +#include +#include + +int sys_fadvise64(int fd, off_t offset, size_t len, int advice) +{ + struct file *file; + int err; + + file = get_file_description(fd); + if (!file) + return -EBADF; + + err = -ESPIPE; + if (S_ISFIFO(file->f_ino->i_mode)) + goto out; + + err = -EINVAL; + if (!file->f_mapping) + goto out; + + err = 0; + switch (advice) + { + case POSIX_FADV_WILLNEED: + do_force_readahead(file->f_mapping->ino, offset, len); + break; + default: + err = -EINVAL; + break; + } + +out: + fd_put(file); + return err; +} diff --git a/kernel/kernel/fs/readahead.c b/kernel/kernel/fs/readahead.c index 3a62b16db..ab0c53d98 100644 --- a/kernel/kernel/fs/readahead.c +++ b/kernel/kernel/fs/readahead.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Pedro Falcato + * Copyright (c) 2024 - 2026 Pedro Falcato * This file is part of Onyx, and is released under the terms of the GPLv2 License * check LICENSE at the root directory for more information * @@ -177,3 +177,25 @@ int filemap_do_readahead_async(struct inode *inode, struct readahead_state *ra_s WRITE_ONCE(ra_state->ra_window, window * 2); return filemap_do_readahead(inode, ra_state, READ_ONCE(ra_state->ra_start) + window * 2); } + +void do_force_readahead(struct inode *ino, off_t offset, size_t len) +{ + size_t chunk; + + /* Do readahead in 4MB chunks - it's not desirable to do everything at once, or too much memory + * may be pinned at the same time. */ + while (len) + { + chunk = min(len, 0x400000ul); + unsigned long pgoff = offset >> PAGE_SHIFT; + struct readahead_state ra = { + .ra_window = ((offset + chunk) >> PAGE_SHIFT) - pgoff, + }; + + /* If RA or the filesystem says stop, we stop */ + if (filemap_do_readahead(ino, &ra, pgoff) < 0) + break; + len -= chunk; + offset += chunk; + } +} From 276cfcd5a92403cba81093d86178fce05547ba58 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 16 Apr 2026 21:42:27 +0100 Subject: [PATCH 07/15] bootstrap-init: add support for invoking fsck for the root fs Add support for invoking fsck for the root filesystem, if present in the initrd. Signed-off-by: Pedro Falcato --- usystem/core/bootstrap-init/main.c | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/usystem/core/bootstrap-init/main.c b/usystem/core/bootstrap-init/main.c index 09eff69cb..73333e184 100644 --- a/usystem/core/bootstrap-init/main.c +++ b/usystem/core/bootstrap-init/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include int mount_autodetect(const char *dev, const char *mpoint) @@ -122,6 +123,69 @@ static void drop_to_rescue_sh() perror("exec"); } +static int do_fsck(const char *bdev) +{ + /* No support for anything else right now */ + const char *filesystems[] = {"ext2", "ext3", "ext4"}; + char buf[64]; + int exec = 0; + int wstatus; + pid_t pid; + + for (unsigned int i = 0; i < sizeof(filesystems) / sizeof(filesystems[0]); i++) + { + sprintf(buf, "/sbin/fsck.%s", filesystems[i]); + if (access(buf, X_OK) == 0) + { + /* Ok, it's here, let's exec */ + pid = fork(); + if (pid == 0) + { + /* -p = no questions */ + if (execl(buf, buf, "-p", bdev, NULL) < 0) + { + perror("exec"); + exit(1); + } + } + else if (pid < 0) + { + perror("fork"); + return -1; + } + + exec = 1; + break; + } + } + + if (!exec) + { + printf("fsck not found, continuing\n"); + return 0; + } + + if (wait(&wstatus) < 0) + { + perror("wait"); + return 1; + } + + if (!WIFEXITED(wstatus)) + { + if (WIFSIGNALED(wstatus)) + printf("%s exited with signal %d\n", buf, WTERMSIG(wstatus)); + return -1; + } + + /* Now we get to interpret the exit code. We can tolerate 0 or 1 - anything else should drop to + * a rescue shell (0 = clean, 1 = corrected) */ + if (WEXITSTATUS(wstatus) == 0 || WEXITSTATUS(wstatus) == 1) + return 0; + printf("%s exited with error code %d\n", buf, WEXITSTATUS(wstatus)); + return -1; +} + int main(int argc, char **argv) { // Ok so our job is to load initial modules and mount root @@ -205,6 +269,14 @@ int main(int argc, char **argv) if (option_verbose) fprintf(stderr, "bootstrap-init: Mounting root filesystem %s...\n", root_blockdev); + /* fsck the root block device */ + st = do_fsck(root_blockdev); + if (st) + { + drop_to_rescue_sh(); + return 1; + } + st = mount_autodetect(root_blockdev, "/"); free((void *) root_blockdev); From 3e69c5c23ea6d438cc14ecece6802f542d3e821b Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 18 Apr 2026 00:56:27 +0100 Subject: [PATCH 08/15] fs: implement nosuid and nodev Implement nosuid (which stops setuid/setgid executables from doing the actual setuid bit) and nodev (which stops device files from working on a given mount). Signed-off-by: Pedro Falcato --- kernel/include/onyx/mount.h | 4 ++++ kernel/kernel/fs/inode.cpp | 2 ++ kernel/kernel/fs/mount.c | 15 +++++++++++---- kernel/kernel/fs/vfs.cpp | 2 ++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/include/onyx/mount.h b/kernel/include/onyx/mount.h index ca933f394..73605ee91 100644 --- a/kernel/include/onyx/mount.h +++ b/kernel/include/onyx/mount.h @@ -8,6 +8,8 @@ #ifndef _ONYX_MOUNT_H #define _ONYX_MOUNT_H +#include + #include #include #include @@ -16,6 +18,8 @@ struct dentry; struct superblock; #define MNT_READONLY (1U << 0) +#define MNT_NOSUID MS_NOSUID +#define MNT_NODEV MS_NODEV #define MNT_STRICTATIME MS_STRICTATIME #define MNT_NOATIME MS_NOATIME #define MNT_NODIRATIME MS_NODIRATIME diff --git a/kernel/kernel/fs/inode.cpp b/kernel/kernel/fs/inode.cpp index 6e8871f85..092ed3432 100644 --- a/kernel/kernel/fs/inode.cpp +++ b/kernel/kernel/fs/inode.cpp @@ -76,6 +76,8 @@ static int dev_do_open(struct file *filp) struct inode *ino = filp->f_ino; gendev *dev; + if (filp->f_path.mount->mnt_flags & MNT_NODEV) + return -EACCES; dev = S_ISBLK(ino->i_mode) ? (gendev *) dev_find_block(ino->i_rdev) : (gendev *) dev_find_chr(ino->i_rdev); if (!dev) diff --git a/kernel/kernel/fs/mount.c b/kernel/kernel/fs/mount.c index dd5e133e9..da17947c1 100644 --- a/kernel/kernel/fs/mount.c +++ b/kernel/kernel/fs/mount.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 - 2025 Pedro Falcato + * Copyright (c) 2024 - 2026 Pedro Falcato * This file is part of Onyx, and is released under the terms of the GPLv2 License * check LICENSE at the root directory for more information * @@ -441,11 +441,13 @@ static struct mount *do_remount(const char *target, unsigned int sb_flags, unsig return err ? ERR_PTR(err) : mnt; } -#define VALID_MOUNT_FLAGS \ - (MS_RDONLY | MS_SILENT | MS_RELATIME | MS_REMOUNT | MS_BIND | MS_STRICTATIME | MS_NOATIME) +#define VALID_MOUNT_FLAGS \ + (MS_RDONLY | MS_NODEV | MS_NOSUID | MS_SILENT | MS_RELATIME | MS_REMOUNT | MS_BIND | \ + MS_STRICTATIME | MS_NOATIME) /* These flags have the same bits internally as in the argument */ -#define SAME_FLAGS_MASK (MNT_STRICTATIME | MNT_NOATIME | MNT_NODIRATIME | MNT_READONLY) +#define SAME_FLAGS_MASK \ + (MNT_STRICTATIME | MNT_NOATIME | MNT_NODIRATIME | MNT_READONLY | MNT_NODEV | MNT_NOSUID) static unsigned long translate_mount_flags(unsigned long flags) { @@ -647,6 +649,8 @@ static void mounts_print_flags(struct seq_file *m, struct mount *mnt) { unsigned int flags = READ_ONCE(mnt->mnt_flags); unsigned int i; + + // clang-format off static const struct { unsigned int flag; @@ -655,7 +659,10 @@ static void mounts_print_flags(struct seq_file *m, struct mount *mnt) {MNT_NOATIME, "noatime"}, {MNT_STRICTATIME, "strictatime"}, {MNT_NODIRATIME, "nodiratime"}, + {MNT_NODEV, "nodev"}, + {MNT_NOSUID, "nosuid"}, }; + // clang-format on seq_puts(m, mnt_rdonly(mnt) ? "ro" : "rw"); diff --git a/kernel/kernel/fs/vfs.cpp b/kernel/kernel/fs/vfs.cpp index a61ea3dc3..0ca3826c7 100644 --- a/kernel/kernel/fs/vfs.cpp +++ b/kernel/kernel/fs/vfs.cpp @@ -803,6 +803,8 @@ bool apply_sugid_permissions(file *f) auto ino = f->f_ino; bool changed = false; + if (f->f_path.mount->mnt_flags & MNT_NOSUID) + return false; if (!(ino->i_mode & (S_ISGID | S_ISUID))) return false; From ef92bc1682f2ccb2f7d9920995f95c63ce77b44e Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 18 Apr 2026 00:57:43 +0100 Subject: [PATCH 09/15] fs/mount: tolerate NULL filesystem type It seems that Linux mount(2) can tolerate some parameters as NULL. A good example is the filesystem type, which is ignored if not used. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/mount.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/kernel/fs/mount.c b/kernel/kernel/fs/mount.c index da17947c1..8123e2000 100644 --- a/kernel/kernel/fs/mount.c +++ b/kernel/kernel/fs/mount.c @@ -459,15 +459,14 @@ static unsigned long translate_mount_flags(unsigned long flags) int do_mount(const char *source, const char *target, const char *fstype, unsigned long flags, const void *data) { - struct fs_mount *fs; + struct fs_mount *fs = NULL; struct mount *mnt; unsigned long mnt_flags; unsigned int sb_flags; /* Find the fstype's handler */ - fs = fs_mount_get(fstype); - if (!fs) - return -ENODEV; + if (fstype) + fs = fs_mount_get(fstype); if (flags & ~VALID_MOUNT_FLAGS) return -EINVAL; @@ -478,7 +477,11 @@ int do_mount(const char *source, const char *target, const char *fstype, unsigne if ((flags & (MS_REMOUNT | MS_BIND)) == MS_REMOUNT) mnt = do_remount(target, sb_flags, mnt_flags, data); else + { + if (!fs) + return -ENODEV; mnt = do_mount_internal(source, target, fs, mnt_flags, sb_flags, data); + } if (IS_ERR(mnt)) return PTR_ERR(mnt); return 0; @@ -509,11 +512,14 @@ int sys_mount(const char *usource, const char *utarget, const char *ufilesystemt goto out; } - filesystemtype = strcpy_from_user(ufilesystemtype); - if (!filesystemtype) + if (ufilesystemtype != NULL) { - ret = -errno; - goto out; + filesystemtype = strcpy_from_user(ufilesystemtype); + if (!filesystemtype) + { + ret = -errno; + goto out; + } } ret = do_mount(source, target, filesystemtype, mountflags, data); From 7fb9200b6c00cc547a874f74468a6300b9a230ef Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 18 Apr 2026 00:59:07 +0100 Subject: [PATCH 10/15] sched: move the rcu quiescent period call earlier in switching logic Ever since the blamed commit, RCU will not trigger easily as staying on a given thread (e.g idle thread) does not call rcu_do_quiesc(). This happens because rcu_do_quiesc() was being done in sched_load_finis(). This makes RCU practically useless. Instead of that, move the call earlier in the sched switching. Signed-off-by: Pedro Falcato --- kernel/kernel/sched/scheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kernel/sched/scheduler.cpp b/kernel/kernel/sched/scheduler.cpp index cec1a5ec0..b6094cda8 100644 --- a/kernel/kernel/sched/scheduler.cpp +++ b/kernel/kernel/sched/scheduler.cpp @@ -542,8 +542,6 @@ NO_ASAN void sched_load_finish(thread *prev_thread, thread *next_thread) #endif sched_load_thread(prev_thread, next_thread, get_cpu_nr()); - rcu_do_quiesc(); - inc_per_cpu(nr_ctx_switches); if (prev_thread) atomic_and_relaxed(prev_thread->flags, ~THREAD_RUNNING); @@ -629,6 +627,8 @@ extern "C" void *sched_schedule(void *last_stack) do_cputime_accounting(); } + rcu_do_quiesc(); + thread *source_thread = curr_thread; irq_save_and_disable(); From 320a74af8768147f7b319cf4461b871ed2a6cf49 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 18 Apr 2026 01:02:28 +0100 Subject: [PATCH 11/15] fs/pipe: fix poll on unconnected socket always returning POLLHUP The proper behavior for a poll() is to signal a pipe has been broken. This is not true if it never actually got connected in the first place. As such, keep a writer sequence count in file->private_data. This makes it so poll() only returns POLLHUP if the current pipe has seen a writer that is no longer there. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/pipe.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/kernel/fs/pipe.cpp b/kernel/kernel/fs/pipe.cpp index cee3f4c43..75a214fe7 100644 --- a/kernel/kernel/fs/pipe.cpp +++ b/kernel/kernel/fs/pipe.cpp @@ -94,6 +94,7 @@ struct pipe : public refcountable struct list_head pipe_buffers; size_t curr_len{0}; mutex pipe_lock{1}; + unsigned long wr_seq; wait_queue write_queue; wait_queue read_queue; @@ -180,6 +181,7 @@ pipe::pipe() : refcountable(1) init_wait_queue_head(&read_queue); INIT_LIST_HEAD(&pipe_buffers); mutex_init(&pipe_lock); + wr_seq = 0; } pipe::~pipe() @@ -367,6 +369,7 @@ void pipe::close_write_end() { wake_all(&read_queue); } + wr_seq++; } void pipe::close_read_end() @@ -394,7 +397,7 @@ short pipe::poll(struct file *filp, void *poll_file, short events) revents |= (events & (POLLIN | POLLRDNORM)); } - if (writer_count == 0) + if (writer_count == 0 && (unsigned long) filp->private_data != wr_seq) revents |= POLLHUP; } @@ -927,6 +930,7 @@ int pipe::open_named(struct file *filp) else if ((filp->f_flags & O_RDWRMASK) == O_WRONLY) { writer_count++; + wr_seq++; wake_all(&read_queue); COMPILER_BARRIER(); // Use a lambda to go around the multiple wait_for_event problem @@ -941,6 +945,7 @@ int pipe::open_named(struct file *filp) // POSIX leaves this undefined, we peer with ourselves. writer_count++; reader_count++; + wr_seq++; st = 0; } else @@ -952,11 +957,15 @@ int pipe::open_named(struct file *filp) { // Remove ourselves from the count if we got a signal if (filp->f_flags & O_WRONLY) + { writer_count--; + wr_seq++; + } else reader_count--; } + filp->private_data = (void *) wr_seq; return st; } From 473f4acd9bd91938c60e55b1dc2fd181844be961 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 18 Apr 2026 01:05:50 +0100 Subject: [PATCH 12/15] fs/mount: warn once if bad mount flags are passed This is useful and helps diagnose unimplemented mount flags. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/mount.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/kernel/fs/mount.c b/kernel/kernel/fs/mount.c index 8123e2000..c40823e80 100644 --- a/kernel/kernel/fs/mount.c +++ b/kernel/kernel/fs/mount.c @@ -469,7 +469,11 @@ int do_mount(const char *source, const char *target, const char *fstype, unsigne fs = fs_mount_get(fstype); if (flags & ~VALID_MOUNT_FLAGS) + { + pr_warn_once("mount: unknown flags %lx mounting %s (%s)\n", flags & ~VALID_MOUNT_FLAGS, + source, fstype); return -EINVAL; + } mnt_flags = translate_mount_flags(flags); sb_flags = flags & (SB_RDONLY | SB_SILENT); From c5c3f5b0f357c3f72d2ab267295d9714ae4832f4 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Fri, 1 May 2026 01:38:26 +0100 Subject: [PATCH 13/15] net/netlink: remove debug printks Signed-off-by: Pedro Falcato --- kernel/kernel/net/rtnetlink.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/kernel/net/rtnetlink.c b/kernel/kernel/net/rtnetlink.c index db274dd61..50078ab39 100644 --- a/kernel/kernel/net/rtnetlink.c +++ b/kernel/kernel/net/rtnetlink.c @@ -39,7 +39,6 @@ static int do_handle_rtnl(struct netlink_sock *nlsk, struct packetbuf *pbf, stru int type, err; type = nlh->nlmsg_type; - pr_warn("rtnl send %u flags %x\n", type, nlh->nlmsg_flags); if (type > RTM_MAX) return -EOPNOTSUPP; @@ -62,14 +61,11 @@ static int do_handle_rtnl(struct netlink_sock *nlsk, struct packetbuf *pbf, stru if (err < 0) { pbf_free(new_pbf); - pr_warn("err %d\n", err); return err; } - pr_warn("pbf len %u\n", pbf_length(new_pbf)); list_add_tail(&new_pbf->list_node, &nlsk->buf_list); wait_queue_wake_all(&nlsk->wq); - pr_warn("done yay\n"); return err; } From 183e02973d7acb66f4ad8fd6a21c67814db7f5da Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Fri, 1 May 2026 01:39:11 +0100 Subject: [PATCH 14/15] fs/proc: implement sigign and sigcatch for stat Properly collect sigign and sigcatch masks for /proc/pid/stat. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/proc/pid.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/kernel/kernel/fs/proc/pid.c b/kernel/kernel/fs/proc/pid.c index f35482ca6..d229e4407 100644 --- a/kernel/kernel/fs/proc/pid.c +++ b/kernel/kernel/fs/proc/pid.c @@ -100,6 +100,25 @@ static char task_state(struct process *task) return '?'; } +static void task_sigign_sigcatch(struct process *task, sigset_t *ign, sigset_t *catch) +{ + void (*handler)(int); + int signal; + + spin_lock(&task->sighand->signal_lock); + + for (signal = 1; signal < _NSIG; signal++) + { + handler = task->sighand->sigtable[signal].sa_handler; + if (handler == SIG_IGN) + sigaddset(ign, signal); + else if (handler != SIG_DFL) + sigaddset(catch, signal); + } + + spin_unlock(&task->sighand->signal_lock); +} + static int proc_pid_stat_show(struct seq_file *m, void *v) { unsigned long minflt, majflt, cminflt, cmajflt; @@ -113,7 +132,8 @@ static int proc_pid_stat_show(struct seq_file *m, void *v) unsigned long vsize, rss, rsslim; unsigned long startcode, endcode, startstack; unsigned long kstkesp, kstkeip; - unsigned int pending, blocked, ignored, catched; + unsigned int pending, blocked; + sigset_t ignored, catched; unsigned long wchan; unsigned long startdata, enddata; unsigned long start_brk; @@ -220,9 +240,9 @@ static int proc_pid_stat_show(struct seq_file *m, void *v) pending = READ_ONCE(task->sigqueue.pending.__bits[0]); blocked = READ_ONCE(task->sigmask.__bits[0]); - /* TODO: More todo (counting SIG_IGN and !SIG_DFL...)*/ - ignored = 0; - catched = 0; + sigemptyset(&ignored); + sigemptyset(&catched); + task_sigign_sigcatch(task, &ignored, &catched); /* TODO: 0 0 for priority, nice. 0 0 for itrealvalue, starttime. 0 0 for * nswap, cnswap. 0 0 0 0 0 for (40 - 44 in the manpage). */ @@ -234,9 +254,10 @@ static int proc_pid_stat_show(struct seq_file *m, void *v) pid_nr(task_session(task)), tty_num, tty_pgrp, (unsigned int) task->flags, minflt, cminflt, majflt, cmajflt, utime / NS_PER_MS, stime / NS_PER_MS, cutime / NS_PER_MS, cstime / NS_PER_MS, (unsigned long) READ_ONCE(sig->nr_threads), vsize, rss, rsslim, - startcode, endcode, startstack, kstkesp, kstkeip, pending, blocked, ignored, catched, - wchan, SIGCHLD, task->thr->cpu, startdata, enddata, start_brk, arg_start, arg_end, - env_start, env_end, exit_code); + startcode, endcode, startstack, kstkesp, kstkeip, pending, blocked, + (unsigned int) ignored.__bits[0], (unsigned int) catched.__bits[0], wchan, SIGCHLD, + task->thr->cpu, startdata, enddata, start_brk, arg_start, arg_end, env_start, + env_end, exit_code); rcu_read_unlock(); if (mm) From 6de84a7c8a57aa1612b58d7ce0a58a353336d943 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Fri, 1 May 2026 01:40:06 +0100 Subject: [PATCH 15/15] fs/proc: add version file Add a version file that says "onyx-rolling". Required by some /proc parsing programs. Signed-off-by: Pedro Falcato --- kernel/kernel/fs/proc/version.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 kernel/kernel/fs/proc/version.c diff --git a/kernel/kernel/fs/proc/version.c b/kernel/kernel/fs/proc/version.c new file mode 100644 index 000000000..2963ecd1e --- /dev/null +++ b/kernel/kernel/fs/proc/version.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2026 Pedro Falcato + * This file is part of Onyx, and is released under the terms of the GPLv2 License + * check LICENSE at the root directory for more information + * + * SPDX-License-Identifier: GPL-2.0-only + */ +#include +#include + +static int version_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "onyx-rolling\n"); + return 0; +} + +static int proc_version_open(struct file *filp) +{ + return single_open(filp, version_show, NULL); +} + +static const struct proc_file_ops proc_version_ops = { + .open = proc_version_open, + .read_iter = seq_read_iter, + .release = seq_release, +}; + +static __init void proc_version_init(void) +{ + procfs_add_entry("version", 0444, NULL, &proc_version_ops); +}