Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/asm-generic/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif

void tlb_remove_table_sync_one(void);

#else

#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif

static inline void tlb_remove_table_sync_one(void) { }

#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */


Expand Down
3 changes: 3 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,

bool is_hugetlb_entry_migration(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);

#else /* !CONFIG_HUGETLB_PAGE */

Expand Down Expand Up @@ -420,6 +421,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,

static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }

static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}

#endif /* !CONFIG_HUGETLB_PAGE */
/*
* hugepages at page global directory. If arch support
Expand Down
3 changes: 3 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2436,6 +2436,9 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
if (!pmd_ptlock_init(page))
return false;
__SetPageTable(page);
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
atomic_set(&page->pt_share_count, 0);
#endif
inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
Expand Down
3 changes: 3 additions & 0 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ struct page {
union {
struct mm_struct *pt_mm; /* x86 pgds only */
atomic_t pt_frag_refcount; /* powerpc */
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
RH_KABI_BROKEN_INSERT(atomic_t pt_share_count)
#endif
};
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
Expand Down
123 changes: 86 additions & 37 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;

/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end, bool take_locks);

static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
Expand Down Expand Up @@ -4631,6 +4633,39 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
return 0;
}

void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
* This function is called in the middle of a VMA split operation, with
* MM, VMA and rmap all write-locked to prevent concurrent page table
* walks (except hardware and gup_fast()).
*/
mmap_assert_write_locked(vma->vm_mm);
i_mmap_assert_write_locked(vma->vm_file->f_mapping);

if (addr & ~PUD_MASK) {
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;

if (floor >= vma->vm_start && ceil <= vma->vm_end) {
/*
* Locking:
* Use take_locks=false here.
* The file rmap lock is already held.
* The hugetlb VMA lock can't be taken when we already
* hold the file rmap lock, and we don't need it because
* its purpose is to synchronize against concurrent page
* table walks, which are not possible thanks to the
* locks held by our caller.
*/
hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
}
}
}

static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
{
return huge_page_size(hstate_vma(vma));
Expand Down Expand Up @@ -4734,7 +4769,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma)
{
pte_t *src_pte, *dst_pte, entry, dst_entry;
pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
unsigned long addr;
bool cow = is_cow_mapping(src_vma->vm_flags);
Expand Down Expand Up @@ -4773,30 +4808,20 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
break;
}

/*
* If the pagetables are shared don't copy or take references.
* dst_pte == src_pte is the common case of src/dest sharing.
*
* However, src could have 'unshared' and dst shares with
* another vma. If dst_pte !none, this implies sharing.
* Check here before taking page table lock, and once again
* after taking the lock below.
*/
dst_entry = huge_ptep_get(dst_pte);
if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
/* If the pagetables are shared, there is nothing to do */
if (!!atomic_read(&virt_to_page(dst_pte)->pt_share_count))
continue;
#endif

dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
dst_entry = huge_ptep_get(dst_pte);
again:
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
if (huge_pte_none(entry)) {
/*
* Skip if src entry none. Also, skip in the
* unlikely case dst entry !none as this implies
* sharing with another vma.
* Skip if src entry none.
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
Expand Down Expand Up @@ -4876,7 +4901,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
restore_reserve_on_error(h, dst_vma, addr,
new);
put_page(new);
/* dst_entry won't change as in child */
/* huge_ptep of dst_pte won't change as in child */
goto again;
}
hugetlb_install_page(dst_vma, dst_pte, addr, new);
Expand Down Expand Up @@ -6697,7 +6722,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
spte = huge_pte_offset(svma->vm_mm, saddr,
vma_mmu_pagesize(svma));
if (spte) {
get_page(virt_to_page(spte));
atomic_inc(&virt_to_page(spte)->pt_share_count);
break;
}
}
Expand All @@ -6712,7 +6737,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
mm_inc_nr_pmds(mm);
} else {
put_page(virt_to_page(spte));
atomic_dec(&virt_to_page(spte)->pt_share_count);
}
spin_unlock(ptl);
out:
Expand All @@ -6723,29 +6748,34 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
/*
* unmap huge page backed by shared pte.
*
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
* indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared.
*
* Called with page table lock held and i_mmap_rwsem held in write mode.
* Called with page table lock held.
*
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long *addr, pte_t *ptep)
{
unsigned long sz = huge_page_size(hstate_vma(vma));
pgd_t *pgd = pgd_offset(mm, *addr);
p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);

i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
if (sz != PMD_SIZE)
return 0;
if (!atomic_read(&virt_to_page(ptep)->pt_share_count))
return 0;

pud_clear(pud);
put_page(virt_to_page(ptep));
/*
* Once our caller drops the rmap lock, some other process might be
* using this page table as a normal, non-hugetlb page table.
* Wait for pending gup_fast() in other threads to finish before letting
* that happen.
*/
tlb_remove_table_sync_one();
atomic_dec(&virt_to_page(ptep)->pt_share_count);
mm_dec_nr_pmds(mm);
/*
* This update of passed address optimizes loops sequentially
Expand Down Expand Up @@ -7037,25 +7067,27 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
}

/*
* This function will unconditionally remove all the shared pmd pgtable entries
* within the specific vma for a hugetlbfs memory range.
* If @take_locks is false, the caller must ensure that no concurrent page table
* access can happen (except for gup_fast() and hardware page walks).
* If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
* concurrent page fault handling) and the file rmap lock.
*/
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
struct mm_struct *mm = vma->vm_mm;
struct mmu_notifier_range range;
unsigned long address, start, end;
unsigned long address;
spinlock_t *ptl;
pte_t *ptep;

if (!(vma->vm_flags & VM_MAYSHARE))
return;

start = ALIGN(vma->vm_start, PUD_SIZE);
end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);

if (start >= end)
return;

Expand All @@ -7067,7 +7099,11 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
i_mmap_lock_write(vma->vm_file->f_mapping);
if (take_locks) {
i_mmap_lock_write(vma->vm_file->f_mapping);
} else {
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
}
for (address = start; address < end; address += PUD_SIZE) {
unsigned long tmp = address;

Expand All @@ -7080,14 +7116,27 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
i_mmap_unlock_write(vma->vm_file->f_mapping);
if (take_locks) {
i_mmap_unlock_write(vma->vm_file->f_mapping);
}
/*
* No need to call mmu_notifier_invalidate_range(), see
* Documentation/vm/mmu_notifier.rst.
*/
mmu_notifier_invalidate_range_end(&range);
}

/*
* This function will unconditionally remove all the shared pmd pgtable entries
* within the specific vma for a hugetlbfs memory range.
*/
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE),
/* take_locks = */ true);
}

#ifdef CONFIG_CMA
static bool cma_reserve_called __initdata;

Expand Down
8 changes: 8 additions & 0 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,15 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
}
}
again:
/*
* Get rid of huge pages and shared page tables straddling the split
* boundary.
*/
vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
if (is_vm_hugetlb_page(orig_vma)) {
hugetlb_split(orig_vma, start);
hugetlb_split(orig_vma, end);
}

if (file) {
mapping = file->f_mapping;
Expand Down
4 changes: 1 addition & 3 deletions mm/mmu_gather.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ static void tlb_remove_table_smp_sync(void *arg)
/* Simply deliver the interrupt */
}

static void tlb_remove_table_sync_one(void)
void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
Expand All @@ -164,8 +164,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)

#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */

static void tlb_remove_table_sync_one(void) { }

static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
__tlb_remove_table_free(batch);
Expand Down