Skip to content

Commit 05b1014

Browse files
sean-jcmdroth
authored andcommitted
KVM: Add hugepage support for dedicated guest memory
NOTE: upstream guest_memfd does not currently support 2MB pages, so the support added here is disabled by default. Use the kvm.gmem_2m_enabled=1 module parameter to switch it on. TODO: Allow hugepages for kvm_gmem_populate() (carefully to avoid EEXISTS errors when misaligned) Extended guest_memfd to allow backing guest memory with hugepages. This is done as a best-effort by default until a better-defined mechanism is put in place that can provide better control/assurances to userspace about hugepage allocations. When reporting the max order when KVM gets a pfn from guest_memfd, force order-0 pages if the hugepage is not fully contained by the memslot binding, e.g. if userspace requested hugepages but punches a hole in the memslot bindings in order to emulate x86's VGA hole. Link: https://lore.kernel.org/kvm/[email protected]/T/#mccbd3e8bf9897f0ddbf864e6318d6f2f208b269c Signed-off-by: Sean Christopherson <[email protected]> Message-Id: <[email protected]> [Allow even with CONFIG_TRANSPARENT_HUGEPAGE; dropped momentarily due to uneasiness about the API. - Paolo] Signed-off-by: Paolo Bonzini <[email protected]> [mdr: based on discussion in the Link regarding original patch, make the following set of changes: - For now, don't introduce an opt-in flag to enable hugepage support. By default, just make a best-effort for PMD_ORDER allocations so that there are no false assurances to userspace that they'll get hugepages. Performance-wise, it's better at least than the current guarantee that they will get 4K pages every time. A more proper opt-in interface can then improve on things later. - Pass GFP_NOWARN to alloc_pages() so failures are not disruptive to normal operations - Drop size checks during creation time. Instead just avoid huge allocations if they extend beyond end of the memfd. - Drop hugepage-related unit tests since everything is now handled transparently to userspace anyway. - Update commit message accordingly.] Signed-off-by: Michael Roth <[email protected]>
1 parent 66b8df6 commit 05b1014

File tree

3 files changed

+73
-8
lines changed

3 files changed

+73
-8
lines changed

include/linux/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,6 +2200,8 @@ extern unsigned int halt_poll_ns_grow;
22002200
extern unsigned int halt_poll_ns_grow_start;
22012201
extern unsigned int halt_poll_ns_shrink;
22022202

2203+
extern unsigned int gmem_2m_enabled;
2204+
22032205
struct kvm_device {
22042206
const struct kvm_device_ops *ops;
22052207
struct kvm *kvm;

virt/kvm/guest_memfd.c

Lines changed: 67 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,56 @@ static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct fol
4949
return 0;
5050
}
5151

52-
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
52+
static struct folio *kvm_gmem_get_huge_folio(struct inode *inode, pgoff_t index,
53+
unsigned int order)
5354
{
55+
pgoff_t npages = 1UL << order;
56+
pgoff_t huge_index = round_down(index, npages);
57+
struct address_space *mapping = inode->i_mapping;
58+
gfp_t gfp = mapping_gfp_mask(mapping) | __GFP_NOWARN;
59+
loff_t size = i_size_read(inode);
5460
struct folio *folio;
5561

56-
/* TODO: Support huge pages. */
57-
folio = filemap_grab_folio(inode->i_mapping, index);
58-
if (IS_ERR(folio))
59-
return folio;
62+
/* Make sure hugepages would be fully-contained by inode */
63+
if ((huge_index + npages) * PAGE_SIZE > size)
64+
return NULL;
65+
66+
if (filemap_range_has_page(mapping, (loff_t)huge_index << PAGE_SHIFT,
67+
(loff_t)(huge_index + npages - 1) << PAGE_SHIFT))
68+
return NULL;
69+
70+
folio = filemap_alloc_folio(gfp, order);
71+
if (!folio)
72+
return NULL;
73+
74+
if (filemap_add_folio(mapping, folio, huge_index, gfp)) {
75+
folio_put(folio);
76+
return NULL;
77+
}
78+
79+
return folio;
80+
}
81+
82+
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
83+
{
84+
struct folio *folio = NULL;
85+
86+
/*
87+
* TODO: modify kvm_gmem_populate() so that order can be limited based on
88+
* alignment and other restrictions, otherwise __filemap_get_folio() may
89+
* get called with a smaller granularity and return EEXIST due to
90+
* FGP_CREAT_ONLY being set. For now just limit FGP_CREAT_ONLY cases to 4K.
91+
* (TODO: with FGP_CREAT_ONLY gone, it may be possible to do this
92+
* unconditionally again.
93+
*/
94+
if (prepare && gmem_2m_enabled)
95+
folio = kvm_gmem_get_huge_folio(inode, index, PMD_ORDER);
96+
97+
if (!folio) {
98+
folio = filemap_grab_folio(inode->i_mapping, index);
99+
if (IS_ERR(folio))
100+
return folio;
101+
}
60102

61103
/*
62104
* Use the up-to-date flag to track whether or not the memory has been
@@ -420,6 +462,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
420462
inode->i_mode |= S_IFREG;
421463
inode->i_size = size;
422464
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
465+
mapping_set_large_folios(inode->i_mapping);
423466
mapping_set_unmovable(inode->i_mapping);
424467
/* Unmovable mappings are supposed to be marked unevictable as well. */
425468
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
@@ -545,8 +588,8 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
545588
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
546589
gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
547590
{
548-
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
549591
struct kvm_gmem *gmem = file->private_data;
592+
pgoff_t index, huge_index;
550593
struct folio *folio;
551594
struct page *page;
552595
int r;
@@ -557,6 +600,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
557600
}
558601

559602
gmem = file->private_data;
603+
index = gfn - slot->base_gfn + slot->gmem.pgoff;
560604
if (xa_load(&gmem->bindings, index) != slot) {
561605
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
562606
return -EIO;
@@ -574,9 +618,24 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
574618
page = folio_file_page(folio, index);
575619

576620
*pfn = page_to_pfn(page);
577-
if (max_order)
578-
*max_order = 0;
621+
if (!max_order)
622+
goto success;
623+
624+
*max_order = compound_order(compound_head(page));
625+
if (!*max_order)
626+
goto success;
579627

628+
/*
629+
* The folio can be mapped with a hugepage if and only if the folio is
630+
* fully contained by the range the memslot is bound to. Note, the
631+
* caller is responsible for handling gfn alignment, this only deals
632+
* with the file binding.
633+
*/
634+
huge_index = ALIGN(index, 1ull << *max_order);
635+
if (huge_index < ALIGN(slot->gmem.pgoff, 1ull << *max_order) ||
636+
huge_index + (1ull << *max_order) > slot->gmem.pgoff + slot->npages)
637+
*max_order = 0;
638+
success:
580639
r = 0;
581640

582641
out_unlock:

virt/kvm/kvm_main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ unsigned int halt_poll_ns_shrink;
9696
module_param(halt_poll_ns_shrink, uint, 0644);
9797
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
9898

99+
unsigned int gmem_2m_enabled;
100+
EXPORT_SYMBOL_GPL(gmem_2m_enabled);
101+
module_param(gmem_2m_enabled, uint, 0644);
102+
99103
/*
100104
* Ordering of locks:
101105
*

0 commit comments

Comments
 (0)