Skip to content

Commit d8ea7cc

Browse files
zokeefeakpm00
authored andcommitted
mm/khugepaged: add flag to predicate khugepaged-only behavior
Add .is_khugepaged flag to struct collapse_control so khugepaged-specific behavior can be elided by MADV_COLLAPSE context. Start by protecting khugepaged-specific heuristics by this flag. In MADV_COLLAPSE, the user presumably has reason to believe the collapse will be beneficial and khugepaged heuristics shouldn't prevent the user from doing so: 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared] 2) requirement that some pages in region being collapsed be young or referenced [[email protected]: consistently order cc->is_khugepaged and pte_* checks] Link: https://lkml.kernel.org/r/[email protected] Link: https://lore.kernel.org/linux-mm/[email protected]/ Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Zach O'Keefe <[email protected]> Reviewed-by: Yang Shi <[email protected]> Cc: Alex Shi <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: Chris Kennelly <[email protected]> Cc: Chris Zankel <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: David Rientjes <[email protected]> Cc: Helge Deller <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Ivan Kokshaysky <[email protected]> Cc: James Bottomley <[email protected]> Cc: Jens Axboe <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Matt Turner <[email protected]> Cc: Max Filippov <[email protected]> Cc: Miaohe Lin <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Pasha Tatashin <[email protected]> Cc: Pavel Begunkov <[email protected]> Cc: Peter Xu <[email protected]> Cc: Rongwei Wang <[email protected]> Cc: SeongJae Park <[email protected]> Cc: Song Liu <[email protected]> Cc: Thomas Bogendoerfer <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Zi Yan <[email protected]> Cc: Dan Carpenter <[email protected]> Cc: "Souptick Joarder (HPE)" <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 50ad2f2 commit d8ea7cc

File tree

1 file changed

+58
-25
lines changed

1 file changed

+58
-25
lines changed

Diff for: mm/khugepaged.c

+58-25
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
7373
* default collapse hugepages if there is at least one pte mapped like
7474
* it would have happened if the vma was large enough during page
7575
* fault.
76+
*
77+
* Note that these are only respected if collapse was initiated by khugepaged.
7678
*/
7779
static unsigned int khugepaged_max_ptes_none __read_mostly;
7880
static unsigned int khugepaged_max_ptes_swap __read_mostly;
@@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
8688
#define MAX_PTE_MAPPED_THP 8
8789

8890
struct collapse_control {
91+
bool is_khugepaged;
92+
8993
/* Num pages scanned per node */
9094
u32 node_load[MAX_NUMNODES];
9195

@@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page)
554558
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
555559
unsigned long address,
556560
pte_t *pte,
561+
struct collapse_control *cc,
557562
struct list_head *compound_pagelist)
558563
{
559564
struct page *page = NULL;
@@ -566,8 +571,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
566571
pte_t pteval = *_pte;
567572
if (pte_none(pteval) || (pte_present(pteval) &&
568573
is_zero_pfn(pte_pfn(pteval)))) {
574+
++none_or_zero;
569575
if (!userfaultfd_armed(vma) &&
570-
++none_or_zero <= khugepaged_max_ptes_none) {
576+
(!cc->is_khugepaged ||
577+
none_or_zero <= khugepaged_max_ptes_none)) {
571578
continue;
572579
} else {
573580
result = SCAN_EXCEED_NONE_PTE;
@@ -587,11 +594,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
587594

588595
VM_BUG_ON_PAGE(!PageAnon(page), page);
589596

590-
if (page_mapcount(page) > 1 &&
591-
++shared > khugepaged_max_ptes_shared) {
592-
result = SCAN_EXCEED_SHARED_PTE;
593-
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
594-
goto out;
597+
if (page_mapcount(page) > 1) {
598+
++shared;
599+
if (cc->is_khugepaged &&
600+
shared > khugepaged_max_ptes_shared) {
601+
result = SCAN_EXCEED_SHARED_PTE;
602+
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
603+
goto out;
604+
}
595605
}
596606

597607
if (PageCompound(page)) {
@@ -654,10 +664,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
654664
if (PageCompound(page))
655665
list_add_tail(&page->lru, compound_pagelist);
656666
next:
657-
/* There should be enough young pte to collapse the page */
658-
if (pte_young(pteval) ||
659-
page_is_young(page) || PageReferenced(page) ||
660-
mmu_notifier_test_young(vma->vm_mm, address))
667+
/*
668+
* If collapse was initiated by khugepaged, check that there is
669+
* enough young pte to justify collapsing the page
670+
*/
671+
if (cc->is_khugepaged &&
672+
(pte_young(pteval) || page_is_young(page) ||
673+
PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
674+
address)))
661675
referenced++;
662676

663677
if (pte_write(pteval))
@@ -666,7 +680,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
666680

667681
if (unlikely(!writable)) {
668682
result = SCAN_PAGE_RO;
669-
} else if (unlikely(!referenced)) {
683+
} else if (unlikely(cc->is_khugepaged && !referenced)) {
670684
result = SCAN_LACK_REFERENCED_PAGE;
671685
} else {
672686
result = SCAN_SUCCEED;
@@ -745,6 +759,7 @@ static void khugepaged_alloc_sleep(void)
745759

746760

747761
struct collapse_control khugepaged_collapse_control = {
762+
.is_khugepaged = true,
748763
.last_target_node = NUMA_NO_NODE,
749764
};
750765

@@ -1025,7 +1040,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
10251040
mmu_notifier_invalidate_range_end(&range);
10261041

10271042
spin_lock(pte_ptl);
1028-
result = __collapse_huge_page_isolate(vma, address, pte,
1043+
result = __collapse_huge_page_isolate(vma, address, pte, cc,
10291044
&compound_pagelist);
10301045
spin_unlock(pte_ptl);
10311046

@@ -1116,7 +1131,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
11161131
_pte++, _address += PAGE_SIZE) {
11171132
pte_t pteval = *_pte;
11181133
if (is_swap_pte(pteval)) {
1119-
if (++unmapped <= khugepaged_max_ptes_swap) {
1134+
++unmapped;
1135+
if (!cc->is_khugepaged ||
1136+
unmapped <= khugepaged_max_ptes_swap) {
11201137
/*
11211138
* Always be strict with uffd-wp
11221139
* enabled swap entries. Please see
@@ -1134,8 +1151,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
11341151
}
11351152
}
11361153
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
1154+
++none_or_zero;
11371155
if (!userfaultfd_armed(vma) &&
1138-
++none_or_zero <= khugepaged_max_ptes_none) {
1156+
(!cc->is_khugepaged ||
1157+
none_or_zero <= khugepaged_max_ptes_none)) {
11391158
continue;
11401159
} else {
11411160
result = SCAN_EXCEED_NONE_PTE;
@@ -1165,11 +1184,14 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
11651184
goto out_unmap;
11661185
}
11671186

1168-
if (page_mapcount(page) > 1 &&
1169-
++shared > khugepaged_max_ptes_shared) {
1170-
result = SCAN_EXCEED_SHARED_PTE;
1171-
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
1172-
goto out_unmap;
1187+
if (page_mapcount(page) > 1) {
1188+
++shared;
1189+
if (cc->is_khugepaged &&
1190+
shared > khugepaged_max_ptes_shared) {
1191+
result = SCAN_EXCEED_SHARED_PTE;
1192+
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
1193+
goto out_unmap;
1194+
}
11731195
}
11741196

11751197
page = compound_head(page);
@@ -1220,14 +1242,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
12201242
result = SCAN_PAGE_COUNT;
12211243
goto out_unmap;
12221244
}
1223-
if (pte_young(pteval) ||
1224-
page_is_young(page) || PageReferenced(page) ||
1225-
mmu_notifier_test_young(vma->vm_mm, address))
1245+
1246+
/*
1247+
* If collapse was initiated by khugepaged, check that there is
1248+
* enough young pte to justify collapsing the page
1249+
*/
1250+
if (cc->is_khugepaged &&
1251+
(pte_young(pteval) || page_is_young(page) ||
1252+
PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
1253+
address)))
12261254
referenced++;
12271255
}
12281256
if (!writable) {
12291257
result = SCAN_PAGE_RO;
1230-
} else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
1258+
} else if (cc->is_khugepaged &&
1259+
(!referenced ||
1260+
(unmapped && referenced < HPAGE_PMD_NR / 2))) {
12311261
result = SCAN_LACK_REFERENCED_PAGE;
12321262
} else {
12331263
result = SCAN_SUCCEED;
@@ -1896,7 +1926,9 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
18961926
continue;
18971927

18981928
if (xa_is_value(page)) {
1899-
if (++swap > khugepaged_max_ptes_swap) {
1929+
++swap;
1930+
if (cc->is_khugepaged &&
1931+
swap > khugepaged_max_ptes_swap) {
19001932
result = SCAN_EXCEED_SWAP_PTE;
19011933
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
19021934
break;
@@ -1947,7 +1979,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
19471979
rcu_read_unlock();
19481980

19491981
if (result == SCAN_SUCCEED) {
1950-
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
1982+
if (cc->is_khugepaged &&
1983+
present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
19511984
result = SCAN_EXCEED_NONE_PTE;
19521985
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
19531986
} else {

0 commit comments

Comments
 (0)