Skip to content

Commit 4a43efc

Browse files
sean-jcmdroth
authored andcommitted
KVM: Add hugepage support for dedicated guest memory
NOTE: upstream guest_memfd does not currently support 2MB pages, so the support added here is disabled by default. Use the kvm.gmem_2m_enabled=1 module parameter to switch it on. TODO: Allow hugepages for kvm_gmem_populate() (carefully to avoid EEXISTS errors when misaligned) Extended guest_memfd to allow backing guest memory with hugepages. This is done as a best-effort by default until a better-defined mechanism is put in place that can provide better control/assurances to userspace about hugepage allocations. When reporting the max order when KVM gets a pfn from guest_memfd, force order-0 pages if the hugepage is not fully contained by the memslot binding, e.g. if userspace requested hugepages but punches a hole in the memslot bindings in order to emulate x86's VGA hole. Link: https://lore.kernel.org/kvm/[email protected]/T/#mccbd3e8bf9897f0ddbf864e6318d6f2f208b269c Signed-off-by: Sean Christopherson <[email protected]> Message-Id: <[email protected]> [Allow even with CONFIG_TRANSPARENT_HUGEPAGE; dropped momentarily due to uneasiness about the API. - Paolo] Signed-off-by: Paolo Bonzini <[email protected]> [mdr: based on discussion in the Link regarding original patch, make the following set of changes: - For now, don't introduce an opt-in flag to enable hugepage support. By default, just make a best-effort for PMD_ORDER allocations so that there are no false assurances to userspace that they'll get hugepages. Performance-wise, it's better at least than the current guarantee that they will get 4K pages every time. A more proper opt-in interface can then improve on things later. - Pass GFP_NOWARN to alloc_pages() so failures are not disruptive to normal operations - Drop size checks during creation time. Instead just avoid huge allocations if they extend beyond end of the memfd. - Drop hugepage-related unit tests since everything is now handled transparently to userspace anyway. - Update commit message accordingly.] Signed-off-by: Michael Roth <[email protected]>
1 parent 718de61 commit 4a43efc

File tree

3 files changed

+64
-4
lines changed

3 files changed

+64
-4
lines changed

include/linux/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,6 +2211,8 @@ extern unsigned int halt_poll_ns_grow;
22112211
extern unsigned int halt_poll_ns_grow_start;
22122212
extern unsigned int halt_poll_ns_shrink;
22132213

2214+
extern unsigned int gmem_2m_enabled;
2215+
22142216
struct kvm_device {
22152217
const struct kvm_device_ops *ops;
22162218
struct kvm *kvm;

virt/kvm/guest_memfd.c

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,36 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
8787
return r;
8888
}
8989

90+
static struct folio *kvm_gmem_get_huge_folio(struct inode *inode, pgoff_t index,
91+
unsigned int order)
92+
{
93+
pgoff_t npages = 1UL << order;
94+
pgoff_t huge_index = round_down(index, npages);
95+
struct address_space *mapping = inode->i_mapping;
96+
gfp_t gfp = mapping_gfp_mask(mapping) | __GFP_NOWARN;
97+
loff_t size = i_size_read(inode);
98+
struct folio *folio;
99+
100+
/* Make sure hugepages would be fully-contained by inode */
101+
if ((huge_index + npages) * PAGE_SIZE > size)
102+
return NULL;
103+
104+
if (filemap_range_has_page(mapping, (loff_t)huge_index << PAGE_SHIFT,
105+
(loff_t)(huge_index + npages - 1) << PAGE_SHIFT))
106+
return NULL;
107+
108+
folio = filemap_alloc_folio(gfp, order);
109+
if (!folio)
110+
return NULL;
111+
112+
if (filemap_add_folio(mapping, folio, huge_index, gfp)) {
113+
folio_put(folio);
114+
return NULL;
115+
}
116+
117+
return folio;
118+
}
119+
90120
/*
91121
* Returns a locked folio on success. The caller is responsible for
92122
* setting the up-to-date flag before the memory is mapped into the guest.
@@ -98,8 +128,15 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
98128
*/
99129
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
100130
{
101-
/* TODO: Support huge pages. */
102-
return filemap_grab_folio(inode->i_mapping, index);
131+
struct folio *folio = NULL;
132+
133+
if (gmem_2m_enabled)
134+
folio = kvm_gmem_get_huge_folio(inode, index, PMD_ORDER);
135+
136+
if (!folio)
137+
folio = filemap_grab_folio(inode->i_mapping, index);
138+
139+
return folio;
103140
}
104141

105142
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
@@ -429,6 +466,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
429466
inode->i_size = size;
430467
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
431468
mapping_set_inaccessible(inode->i_mapping);
469+
mapping_set_large_folios(inode->i_mapping);
432470
/* Unmovable mappings are supposed to be marked unevictable as well. */
433471
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
434472

@@ -556,8 +594,8 @@ __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
556594
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
557595
int *max_order)
558596
{
559-
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
560597
struct kvm_gmem *gmem = file->private_data;
598+
pgoff_t index, huge_index;
561599
struct folio *folio;
562600

563601
if (file != slot->gmem.file) {
@@ -566,6 +604,7 @@ __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
566604
}
567605

568606
gmem = file->private_data;
607+
index = gfn - slot->base_gfn + slot->gmem.pgoff;
569608
if (xa_load(&gmem->bindings, index) != slot) {
570609
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
571610
return ERR_PTR(-EIO);
@@ -582,7 +621,22 @@ __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
582621
}
583622

584623
*pfn = folio_file_pfn(folio, index);
585-
if (max_order)
624+
if (!max_order)
625+
goto success;
626+
627+
*max_order = compound_order(compound_head(page));
628+
if (!*max_order)
629+
goto success;
630+
631+
/*
632+
* The folio can be mapped with a hugepage if and only if the folio is
633+
* fully contained by the range the memslot is bound to. Note, the
634+
* caller is responsible for handling gfn alignment, this only deals
635+
* with the file binding.
636+
*/
637+
huge_index = ALIGN(index, 1ull << *max_order);
638+
if (huge_index < ALIGN(slot->gmem.pgoff, 1ull << *max_order) ||
639+
huge_index + (1ull << *max_order) > slot->gmem.pgoff + slot->npages)
586640
*max_order = 0;
587641

588642
*is_prepared = folio_test_uptodate(folio);

virt/kvm/kvm_main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ unsigned int halt_poll_ns_shrink = 2;
9494
module_param(halt_poll_ns_shrink, uint, 0644);
9595
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
9696

97+
unsigned int gmem_2m_enabled;
98+
EXPORT_SYMBOL_GPL(gmem_2m_enabled);
99+
module_param(gmem_2m_enabled, uint, 0644);
100+
97101
/*
98102
* Ordering of locks:
99103
*

0 commit comments

Comments
 (0)