Skip to content

Commit 85ef1ac

Browse files
committed
[TEMP] KVM: guest_memfd: Update gmem_prepare hook to handle partially-allocated folios
TODO: move to bitmap-based tracking within gmem Marking the whole folio uptodate as part of kvm_gmem_populate() doesn't work as expected for THP because kvm_gmem_populate() might only update a subset of the pages, leaving the other pages still needing the gmem_prepare hooks to be called when they are faulted in later. Eventually this will be tracked by introducing a more granular flag than uptodate that can track whether any particular 4K page has been prepared or not. For now however, modify kvm_gmem_populate() to only set the uptodate flag if the entire 2M range is initialized, and leave it unset afterward so gmem_prepare hooks still run. The gmem_prepare() hooks themselves can already track page states at 4K granularity via the RMP table, so are fully capable of dealing with partially-initialized folios. As an optimization, also allow kvm_gmem_prepare() to set the uptodate flag if the whole folio is already in a private/initialized state to avoid needing to scan the RMP entries for each subpage. Effectively, this makes the uptodate flag an optimization to skip RMP checks within a fully-prepared folio, but in the absence of the uptodate flag the RMP table is ultimately the authority on preparation state of an individual sub-page within a folio. Signed-off-by: Michael Roth <[email protected]>
1 parent 4a43efc commit 85ef1ac

File tree

2 files changed

+91
-18
lines changed

2 files changed

+91
-18
lines changed

arch/x86/kvm/svm/sev.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,6 +2271,9 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
22712271
int npages = (1 << order);
22722272
gfn_t gfn;
22732273

2274+
pr_debug("%s: GFN start 0x%llx PFN start 0x%llx order %d\n",
2275+
__func__, gfn_start, pfn, order);
2276+
22742277
if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src))
22752278
return -EINVAL;
22762279

@@ -4775,6 +4778,33 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
47754778
put_page(pfn_to_page(pfn));
47764779
}
47774780

4781+
static bool is_pfn_range_private(kvm_pfn_t start, kvm_pfn_t end)
4782+
{
4783+
kvm_pfn_t pfn = start;
4784+
4785+
while (pfn < end) {
4786+
int ret, rmp_level;
4787+
bool assigned;
4788+
4789+
ret = snp_lookup_rmpentry(pfn, &assigned, &rmp_level);
4790+
if (ret) {
4791+
pr_warn_ratelimited("SEV: Failed to retrieve RMP entry: PFN 0x%llx GFN start 0x%llx GFN end 0x%llx RMP level %d error %d\n",
4792+
pfn, start, end, rmp_level, ret);
4793+
return false;
4794+
}
4795+
4796+
if (!assigned) {
4797+
pr_debug("%s: overlap detected, PFN 0x%llx start 0x%llx end 0x%llx RMP level %d\n",
4798+
__func__, pfn, start, end, rmp_level);
4799+
return false;
4800+
}
4801+
4802+
pfn++;
4803+
}
4804+
4805+
return true;
4806+
}
4807+
47784808
static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end)
47794809
{
47804810
kvm_pfn_t pfn = start;
@@ -4830,6 +4860,7 @@ int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
48304860
{
48314861
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
48324862
kvm_pfn_t pfn_aligned;
4863+
struct folio *folio;
48334864
gfn_t gfn_aligned;
48344865
int level, rc;
48354866
bool assigned;
@@ -4860,6 +4891,16 @@ int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
48604891
gfn_aligned = gfn;
48614892
}
48624893

4894+
folio = page_folio(pfn_to_page(pfn_aligned));
4895+
if (!folio_test_uptodate(folio)) {
4896+
unsigned long nr_pages = level == PG_LEVEL_4K ? 1 : 512;
4897+
int i;
4898+
4899+
pr_debug("%s: folio not up-to-date, clearing folio pages.\n", __func__);
4900+
for (i = 0; i < nr_pages; i++)
4901+
clear_highpage(pfn_to_page(pfn_aligned + i));
4902+
}
4903+
48634904
rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned), level, sev->asid, false);
48644905
if (rc) {
48654906
pr_err_ratelimited("SEV: Failed to update RMP entry: GFN %llx PFN %llx level %d error %d\n",
@@ -4870,6 +4911,15 @@ int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
48704911
pr_debug("%s: updated: gfn %llx pfn %llx pfn_aligned %llx max_order %d level %d\n",
48714912
__func__, gfn, pfn, pfn_aligned, max_order, level);
48724913

4914+
if (pfn == pfn_aligned && folio_order(folio) == max_order) {
4915+
folio_mark_uptodate(folio);
4916+
pr_debug("%s: setting folio up-to-date (full update)\n", __func__);
4917+
} else if (is_pfn_range_private(pfn_aligned,
4918+
pfn_aligned + (1 << folio_order(folio)))) {
4919+
folio_mark_uptodate(folio);
4920+
pr_debug("%s: setting folio up-to-date (follow-up update)\n", __func__);
4921+
}
4922+
48734923
return 0;
48744924
}
48754925

virt/kvm/guest_memfd.c

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,9 @@ static inline void kvm_gmem_mark_prepared(struct folio *folio)
5656
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
5757
gfn_t gfn, struct folio *folio)
5858
{
59-
unsigned long nr_pages, i;
6059
pgoff_t index;
6160
int r;
6261

63-
nr_pages = folio_nr_pages(folio);
64-
for (i = 0; i < nr_pages; i++)
65-
clear_highpage(folio_page(folio, i));
66-
6762
/*
6863
* Preparing huge folios should always be safe, since it should
6964
* be possible to split them later if needed.
@@ -77,12 +72,16 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
7772
* The order will be passed when creating the guest_memfd, and
7873
* checked when creating memslots.
7974
*/
80-
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
75+
if (!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)))
76+
pr_debug_ratelimited("%s: GFN %llx not aligned (slot gfn start %llx pgoff %lx)",
77+
__func__, gfn, slot->base_gfn, slot->gmem.pgoff);
8178
index = gfn - slot->base_gfn + slot->gmem.pgoff;
8279
index = ALIGN_DOWN(index, 1 << folio_order(folio));
8380
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
84-
if (!r)
85-
kvm_gmem_mark_prepared(folio);
81+
if (!r) {
82+
pr_debug("%s: marking GFN %llx prepared\n", __func__, gfn);
83+
//kvm_gmem_mark_prepared(folio);
84+
}
8685

8786
return r;
8887
}
@@ -126,19 +125,36 @@ static struct folio *kvm_gmem_get_huge_folio(struct inode *inode, pgoff_t index,
126125
* Ignore accessed, referenced, and dirty flags. The memory is
127126
* unevictable and there is no storage to write back to.
128127
*/
129-
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
128+
static struct folio *__kvm_gmem_get_folio(struct inode *inode, pgoff_t index,
129+
bool allow_huge)
130130
{
131131
struct folio *folio = NULL;
132132

133-
if (gmem_2m_enabled)
133+
if (gmem_2m_enabled && allow_huge)
134134
folio = kvm_gmem_get_huge_folio(inode, index, PMD_ORDER);
135135

136136
if (!folio)
137137
folio = filemap_grab_folio(inode->i_mapping, index);
138138

139+
pr_debug("%s: allocate folio with PFN %lx order %d\n",
140+
__func__, folio_pfn(folio), folio_order(folio));
139141
return folio;
140142
}
141143

144+
/*
145+
* Returns a locked folio on success. The caller is responsible for
146+
* setting the up-to-date flag before the memory is mapped into the guest.
147+
* There is no backing storage for the memory, so the folio will remain
148+
* up-to-date until it's removed.
149+
*
150+
* Ignore accessed, referenced, and dirty flags. The memory is
151+
* unevictable and there is no storage to write back to.
152+
*/
153+
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
154+
{
155+
return __kvm_gmem_get_folio(inode, index, true);
156+
}
157+
142158
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
143159
pgoff_t end)
144160
{
@@ -592,7 +608,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
592608
static struct folio *
593609
__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
594610
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
595-
int *max_order)
611+
int *max_order, bool allow_huge)
596612
{
597613
struct kvm_gmem *gmem = file->private_data;
598614
pgoff_t index, huge_index;
@@ -610,7 +626,7 @@ __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
610626
return ERR_PTR(-EIO);
611627
}
612628

613-
folio = kvm_gmem_get_folio(file_inode(file), index);
629+
folio = __kvm_gmem_get_folio(file_inode(file), index, allow_huge);
614630
if (IS_ERR(folio))
615631
return folio;
616632

@@ -622,11 +638,11 @@ __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
622638

623639
*pfn = folio_file_pfn(folio, index);
624640
if (!max_order)
625-
goto success;
641+
return folio;
626642

627-
*max_order = compound_order(compound_head(page));
643+
*max_order = folio_order(folio);
628644
if (!*max_order)
629-
goto success;
645+
return folio;
630646

631647
/*
632648
* The folio can be mapped with a hugepage if and only if the folio is
@@ -654,7 +670,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
654670
if (!file)
655671
return -EFAULT;
656672

657-
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order);
673+
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order, true);
658674
if (IS_ERR(folio)) {
659675
r = PTR_ERR(folio);
660676
goto out;
@@ -710,12 +726,16 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
710726
break;
711727
}
712728

713-
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order);
729+
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order, false);
714730
if (IS_ERR(folio)) {
715731
ret = PTR_ERR(folio);
716732
break;
717733
}
718734

735+
pr_debug("%s: GFN start 0x%llx PFN start 0x%llx max_order %d folio_order %d uptodate %d\n",
736+
__func__, gfn, pfn, max_order, folio_order(folio),
737+
folio_test_uptodate(folio));
738+
719739
if (is_prepared) {
720740
folio_unlock(folio);
721741
folio_put(folio);
@@ -738,8 +758,11 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
738758

739759
p = src ? src + i * PAGE_SIZE : NULL;
740760
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
741-
if (!ret)
761+
if (!ret && max_order == folio_order(folio)) {
762+
pr_debug("%s: GFN start 0x%llx PFN start 0x%llx max_order %d folio_order %d marking uptodate.\n",
763+
__func__, gfn, pfn, max_order, folio_order(folio));
742764
kvm_gmem_mark_prepared(folio);
765+
}
743766

744767
put_folio_and_exit:
745768
folio_put(folio);

0 commit comments

Comments
 (0)