diff --git a/MAINTAINERS b/MAINTAINERS index 5cf6873569d3..72870562746b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11540,6 +11540,7 @@ F: mm/hugetlb.c F: mm/hugetlb_cgroup.c F: mm/hugetlb_cma.c F: mm/hugetlb_cma.h +F: mm/hugetlb_sysfs.c F: mm/hugetlb_vmemmap.c F: mm/hugetlb_vmemmap.h F: tools/testing/selftests/cgroup/test_hugetlb_memcg.c diff --git a/mm/Makefile b/mm/Makefile index 21abb3353550..b9edfce6c202 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -78,7 +78,7 @@ endif obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_ZSWAP) += zswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o -obj-$(CONFIG_HUGETLBFS) += hugetlb.o +obj-$(CONFIG_HUGETLBFS) += hugetlb.o hugetlb_sysfs.o ifdef CONFIG_CMA obj-$(CONFIG_HUGETLBFS) += hugetlb_cma.o endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac5ce2b2b87d..26b2a319b002 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -46,13 +45,12 @@ #include #include -#include -#include #include #include #include "internal.h" #include "hugetlb_vmemmap.h" #include "hugetlb_cma.h" +#include "hugetlb_internal.h" #include int hugetlb_max_hstate __read_mostly; @@ -134,17 +132,6 @@ static void hugetlb_free_folio(struct folio *folio) folio_put(folio); } -/* - * Check if the hstate represents gigantic pages but gigantic page - * runtime support is not available. This is a common condition used to - * skip operations that cannot be performed on gigantic pages when runtime - * support is disabled. - */ -static inline bool hstate_is_gigantic_no_runtime(struct hstate *h) -{ - return hstate_is_gigantic(h) && !gigantic_page_runtime_supported(); -} - static inline bool subpool_is_free(struct hugepage_subpool *spool) { if (spool->count) @@ -1431,77 +1418,6 @@ static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h, return NULL; } -/* - * common helper functions for hstate_next_node_to_{alloc|free}. - * We may have allocated or freed a huge page based on a different - * nodes_allowed previously, so h->next_node_to_{alloc|free} might - * be outside of *nodes_allowed. Ensure that we use an allowed - * node for alloc or free. - */ -static int next_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - nid = next_node_in(nid, *nodes_allowed); - VM_BUG_ON(nid >= MAX_NUMNODES); - - return nid; -} - -static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - if (!node_isset(nid, *nodes_allowed)) - nid = next_node_allowed(nid, nodes_allowed); - return nid; -} - -/* - * returns the previously saved node ["this node"] from which to - * allocate a persistent huge page for the pool and advance the - * next node from which to allocate, handling wrap at end of node - * mask. - */ -static int hstate_next_node_to_alloc(int *next_node, - nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(*next_node, nodes_allowed); - *next_node = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -/* - * helper for remove_pool_hugetlb_folio() - return the previously saved - * node ["this node"] from which to free a huge page. Advance the - * next node id whether or not we find a free huge page to free so - * that the next attempt to free addresses the next node. - */ -static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); - h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -#define for_each_node_mask_to_alloc(next_node, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_alloc(next_node, mask)) || 1); \ - nr_nodes--) - -#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_free(hs, mask)) || 1); \ - nr_nodes--) - #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE #ifdef CONFIG_CONTIG_ALLOC static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, @@ -1557,8 +1473,8 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, * * Must be called with hugetlb lock held. */ -static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) +void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1593,8 +1509,8 @@ static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, h->nr_huge_pages_node[nid]--; } -static void add_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) +void add_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1925,7 +1841,7 @@ static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio) h->nr_huge_pages_node[folio_nid(folio)]++; } -static void init_new_hugetlb_folio(struct folio *folio) +void init_new_hugetlb_folio(struct folio *folio) { __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); @@ -2037,8 +1953,8 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, return folio; } -static void prep_and_add_allocated_folios(struct hstate *h, - struct list_head *folio_list) +void prep_and_add_allocated_folios(struct hstate *h, + struct list_head *folio_list) { unsigned long flags; struct folio *folio, *tmp_f; @@ -4093,8 +4009,8 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, return rc; } -static long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, - unsigned long nr_to_demote) +long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, + unsigned long nr_to_demote) __must_hold(&hugetlb_lock) { int nr_nodes, node; @@ -4162,51 +4078,7 @@ static long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, return -EBUSY; } -#define HSTATE_ATTR_RO(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_RO(_name) - -#define HSTATE_ATTR_WO(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_WO(_name) - -#define HSTATE_ATTR(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_RW(_name) - -static struct kobject *hugepages_kobj; -static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; - -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp); - -static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp) -{ - int i; - - for (i = 0; i < HUGE_MAX_HSTATE; i++) - if (hstate_kobjs[i] == kobj) { - if (nidp) - *nidp = NUMA_NO_NODE; - return &hstates[i]; - } - - return kobj_to_node_hstate(kobj, nidp); -} - -static ssize_t nr_hugepages_show_common(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long nr_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - nr_huge_pages = h->nr_huge_pages; - else - nr_huge_pages = h->nr_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", nr_huge_pages); -} - -static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, +ssize_t __nr_hugepages_store_common(bool obey_mempolicy, struct hstate *h, int nid, unsigned long count, size_t len) { @@ -4239,452 +4111,6 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, return err ? err : len; } -static ssize_t nr_hugepages_store_common(bool obey_mempolicy, - struct kobject *kobj, const char *buf, - size_t len) -{ - struct hstate *h; - unsigned long count; - int nid; - int err; - - err = kstrtoul(buf, 10, &count); - if (err) - return err; - - h = kobj_to_hstate(kobj, &nid); - return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); -} - -static ssize_t nr_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return nr_hugepages_show_common(kobj, attr, buf); -} - -static ssize_t nr_hugepages_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - return nr_hugepages_store_common(false, kobj, buf, len); -} -HSTATE_ATTR(nr_hugepages); - -#ifdef CONFIG_NUMA - -/* - * hstate attribute for optionally mempolicy-based constraint on persistent - * huge page alloc/free. - */ -static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return nr_hugepages_show_common(kobj, attr, buf); -} - -static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - return nr_hugepages_store_common(true, kobj, buf, len); -} -HSTATE_ATTR(nr_hugepages_mempolicy); -#endif - - -static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages); -} - -static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - int err; - unsigned long input; - struct hstate *h = kobj_to_hstate(kobj, NULL); - - if (hstate_is_gigantic_no_runtime(h)) - return -EINVAL; - - err = kstrtoul(buf, 10, &input); - if (err) - return err; - - spin_lock_irq(&hugetlb_lock); - h->nr_overcommit_huge_pages = input; - spin_unlock_irq(&hugetlb_lock); - - return count; -} -HSTATE_ATTR(nr_overcommit_hugepages); - -static ssize_t free_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long free_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - free_huge_pages = h->free_huge_pages; - else - free_huge_pages = h->free_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", free_huge_pages); -} -HSTATE_ATTR_RO(free_hugepages); - -static ssize_t resv_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - return sysfs_emit(buf, "%lu\n", h->resv_huge_pages); -} -HSTATE_ATTR_RO(resv_hugepages); - -static ssize_t surplus_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long surplus_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - surplus_huge_pages = h->surplus_huge_pages; - else - surplus_huge_pages = h->surplus_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", surplus_huge_pages); -} -HSTATE_ATTR_RO(surplus_hugepages); - -static ssize_t demote_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - unsigned long nr_demote; - unsigned long nr_available; - nodemask_t nodes_allowed, *n_mask; - struct hstate *h; - int err; - int nid; - - err = kstrtoul(buf, 10, &nr_demote); - if (err) - return err; - h = kobj_to_hstate(kobj, &nid); - - if (nid != NUMA_NO_NODE) { - init_nodemask_of_node(&nodes_allowed, nid); - n_mask = &nodes_allowed; - } else { - n_mask = &node_states[N_MEMORY]; - } - - /* Synchronize with other sysfs operations modifying huge pages */ - mutex_lock(&h->resize_lock); - spin_lock_irq(&hugetlb_lock); - - while (nr_demote) { - long rc; - - /* - * Check for available pages to demote each time thorough the - * loop as demote_pool_huge_page will drop hugetlb_lock. - */ - if (nid != NUMA_NO_NODE) - nr_available = h->free_huge_pages_node[nid]; - else - nr_available = h->free_huge_pages; - nr_available -= h->resv_huge_pages; - if (!nr_available) - break; - - rc = demote_pool_huge_page(h, n_mask, nr_demote); - if (rc < 0) { - err = rc; - break; - } - - nr_demote -= rc; - } - - spin_unlock_irq(&hugetlb_lock); - mutex_unlock(&h->resize_lock); - - if (err) - return err; - return len; -} -HSTATE_ATTR_WO(demote); - -static ssize_t demote_size_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; - - return sysfs_emit(buf, "%lukB\n", demote_size); -} - -static ssize_t demote_size_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct hstate *h, *demote_hstate; - unsigned long demote_size; - unsigned int demote_order; - - demote_size = (unsigned long)memparse(buf, NULL); - - demote_hstate = size_to_hstate(demote_size); - if (!demote_hstate) - return -EINVAL; - demote_order = demote_hstate->order; - if (demote_order < HUGETLB_PAGE_ORDER) - return -EINVAL; - - /* demote order must be smaller than hstate order */ - h = kobj_to_hstate(kobj, NULL); - if (demote_order >= h->order) - return -EINVAL; - - /* resize_lock synchronizes access to demote size and writes */ - mutex_lock(&h->resize_lock); - h->demote_order = demote_order; - mutex_unlock(&h->resize_lock); - - return count; -} -HSTATE_ATTR(demote_size); - -static struct attribute *hstate_attrs[] = { - &nr_hugepages_attr.attr, - &nr_overcommit_hugepages_attr.attr, - &free_hugepages_attr.attr, - &resv_hugepages_attr.attr, - &surplus_hugepages_attr.attr, -#ifdef CONFIG_NUMA - &nr_hugepages_mempolicy_attr.attr, -#endif - NULL, -}; - -static const struct attribute_group hstate_attr_group = { - .attrs = hstate_attrs, -}; - -static struct attribute *hstate_demote_attrs[] = { - &demote_size_attr.attr, - &demote_attr.attr, - NULL, -}; - -static const struct attribute_group hstate_demote_attr_group = { - .attrs = hstate_demote_attrs, -}; - -static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, - struct kobject **hstate_kobjs, - const struct attribute_group *hstate_attr_group) -{ - int retval; - int hi = hstate_index(h); - - hstate_kobjs[hi] = kobject_create_and_add(h->name, parent); - if (!hstate_kobjs[hi]) - return -ENOMEM; - - retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group); - if (retval) { - kobject_put(hstate_kobjs[hi]); - hstate_kobjs[hi] = NULL; - return retval; - } - - if (h->demote_order) { - retval = sysfs_create_group(hstate_kobjs[hi], - &hstate_demote_attr_group); - if (retval) { - pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); - sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group); - kobject_put(hstate_kobjs[hi]); - hstate_kobjs[hi] = NULL; - return retval; - } - } - - return 0; -} - -#ifdef CONFIG_NUMA -static bool hugetlb_sysfs_initialized __ro_after_init; - -/* - * node_hstate/s - associate per node hstate attributes, via their kobjects, - * with node devices in node_devices[] using a parallel array. The array - * index of a node device or _hstate == node id. - * This is here to avoid any static dependency of the node device driver, in - * the base kernel, on the hugetlb module. - */ -struct node_hstate { - struct kobject *hugepages_kobj; - struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; -}; -static struct node_hstate node_hstates[MAX_NUMNODES]; - -/* - * A subset of global hstate attributes for node devices - */ -static struct attribute *per_node_hstate_attrs[] = { - &nr_hugepages_attr.attr, - &free_hugepages_attr.attr, - &surplus_hugepages_attr.attr, - NULL, -}; - -static const struct attribute_group per_node_hstate_attr_group = { - .attrs = per_node_hstate_attrs, -}; - -/* - * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj. - * Returns node id via non-NULL nidp. - */ -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) -{ - int nid; - - for (nid = 0; nid < nr_node_ids; nid++) { - struct node_hstate *nhs = &node_hstates[nid]; - int i; - for (i = 0; i < HUGE_MAX_HSTATE; i++) - if (nhs->hstate_kobjs[i] == kobj) { - if (nidp) - *nidp = nid; - return &hstates[i]; - } - } - - BUG(); - return NULL; -} - -/* - * Unregister hstate attributes from a single node device. - * No-op if no hstate attributes attached. - */ -void hugetlb_unregister_node(struct node *node) -{ - struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->dev.id]; - - if (!nhs->hugepages_kobj) - return; /* no hstate attributes */ - - for_each_hstate(h) { - int idx = hstate_index(h); - struct kobject *hstate_kobj = nhs->hstate_kobjs[idx]; - - if (!hstate_kobj) - continue; - if (h->demote_order) - sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group); - sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group); - kobject_put(hstate_kobj); - nhs->hstate_kobjs[idx] = NULL; - } - - kobject_put(nhs->hugepages_kobj); - nhs->hugepages_kobj = NULL; -} - - -/* - * Register hstate attributes for a single node device. - * No-op if attributes already registered. - */ -void hugetlb_register_node(struct node *node) -{ - struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->dev.id]; - int err; - - if (!hugetlb_sysfs_initialized) - return; - - if (nhs->hugepages_kobj) - return; /* already allocated */ - - nhs->hugepages_kobj = kobject_create_and_add("hugepages", - &node->dev.kobj); - if (!nhs->hugepages_kobj) - return; - - for_each_hstate(h) { - err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj, - nhs->hstate_kobjs, - &per_node_hstate_attr_group); - if (err) { - pr_err("HugeTLB: Unable to add hstate %s for node %d\n", - h->name, node->dev.id); - hugetlb_unregister_node(node); - break; - } - } -} - -/* - * hugetlb init time: register hstate attributes for all registered node - * devices of nodes that have memory. All on-line nodes should have - * registered their associated device by this time. - */ -static void __init hugetlb_register_all_nodes(void) -{ - int nid; - - for_each_online_node(nid) - hugetlb_register_node(node_devices[nid]); -} -#else /* !CONFIG_NUMA */ - -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) -{ - BUG(); - if (nidp) - *nidp = -1; - return NULL; -} - -static void hugetlb_register_all_nodes(void) { } - -#endif - -static void __init hugetlb_sysfs_init(void) -{ - struct hstate *h; - int err; - - hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); - if (!hugepages_kobj) - return; - - for_each_hstate(h) { - err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, - hstate_kobjs, &hstate_attr_group); - if (err) - pr_err("HugeTLB: Unable to add hstate %s\n", h->name); - } - -#ifdef CONFIG_NUMA - hugetlb_sysfs_initialized = true; -#endif - hugetlb_register_all_nodes(); -} - #ifdef CONFIG_SYSCTL static void hugetlb_sysctl_init(void); #else diff --git a/mm/hugetlb_internal.h b/mm/hugetlb_internal.h new file mode 100644 index 000000000000..5ea372500de7 --- /dev/null +++ b/mm/hugetlb_internal.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Internal HugeTLB definitions. + * (C) Nadia Yvette Chambers, April 2004 + */ + +#ifndef _LINUX_HUGETLB_INTERNAL_H +#define _LINUX_HUGETLB_INTERNAL_H + +#include +#include + +/* + * Check if the hstate represents gigantic pages but gigantic page + * runtime support is not available. This is a common condition used to + * skip operations that cannot be performed on gigantic pages when runtime + * support is disabled. + */ +static inline bool hstate_is_gigantic_no_runtime(struct hstate *h) +{ + return hstate_is_gigantic(h) && !gigantic_page_runtime_supported(); +} + +/* + * common helper functions for hstate_next_node_to_{alloc|free}. + * We may have allocated or freed a huge page based on a different + * nodes_allowed previously, so h->next_node_to_{alloc|free} might + * be outside of *nodes_allowed. Ensure that we use an allowed + * node for alloc or free. + */ +static inline int next_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + nid = next_node_in(nid, *nodes_allowed); + VM_BUG_ON(nid >= MAX_NUMNODES); + + return nid; +} + +static inline int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + if (!node_isset(nid, *nodes_allowed)) + nid = next_node_allowed(nid, nodes_allowed); + return nid; +} + +/* + * returns the previously saved node ["this node"] from which to + * allocate a persistent huge page for the pool and advance the + * next node from which to allocate, handling wrap at end of node + * mask. + */ +static inline int hstate_next_node_to_alloc(int *next_node, + nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(*next_node, nodes_allowed); + *next_node = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +/* + * helper for remove_pool_hugetlb_folio() - return the previously saved + * node ["this node"] from which to free a huge page. Advance the + * next node id whether or not we find a free huge page to free so + * that the next attempt to free addresses the next node. + */ +static inline int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); + h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +#define for_each_node_mask_to_alloc(next_node, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_alloc(next_node, mask)) || 1); \ + nr_nodes--) + +#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_free(hs, mask)) || 1); \ + nr_nodes--) + +extern void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus); +extern void add_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus); +extern void init_new_hugetlb_folio(struct folio *folio); +extern void prep_and_add_allocated_folios(struct hstate *h, + struct list_head *folio_list); +extern long demote_pool_huge_page(struct hstate *src, + nodemask_t *nodes_allowed, + unsigned long nr_to_demote); +extern ssize_t __nr_hugepages_store_common(bool obey_mempolicy, + struct hstate *h, int nid, + unsigned long count, size_t len); + +extern void hugetlb_sysfs_init(void) __init; + +#endif /* _LINUX_HUGETLB_INTERNAL_H */ diff --git a/mm/hugetlb_sysfs.c b/mm/hugetlb_sysfs.c new file mode 100644 index 000000000000..79ece91406bf --- /dev/null +++ b/mm/hugetlb_sysfs.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HugeTLB sysfs interfaces. + * (C) Nadia Yvette Chambers, April 2004 + */ + +#include +#include +#include + +#include "hugetlb_vmemmap.h" +#include "hugetlb_internal.h" + +#define HSTATE_ATTR_RO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +#define HSTATE_ATTR_WO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_WO(_name) + +#define HSTATE_ATTR(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_RW(_name) + +static struct kobject *hugepages_kobj; +static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; + +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp); + +static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp) +{ + int i; + + for (i = 0; i < HUGE_MAX_HSTATE; i++) + if (hstate_kobjs[i] == kobj) { + if (nidp) + *nidp = NUMA_NO_NODE; + return &hstates[i]; + } + + return kobj_to_node_hstate(kobj, nidp); +} + +static ssize_t nr_hugepages_show_common(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h; + unsigned long nr_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + nr_huge_pages = h->nr_huge_pages; + else + nr_huge_pages = h->nr_huge_pages_node[nid]; + + return sysfs_emit(buf, "%lu\n", nr_huge_pages); +} + +static ssize_t nr_hugepages_store_common(bool obey_mempolicy, + struct kobject *kobj, const char *buf, + size_t len) +{ + struct hstate *h; + unsigned long count; + int nid; + int err; + + err = kstrtoul(buf, 10, &count); + if (err) + return err; + + h = kobj_to_hstate(kobj, &nid); + return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); +} + +static ssize_t nr_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return nr_hugepages_show_common(kobj, attr, buf); +} + +static ssize_t nr_hugepages_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + return nr_hugepages_store_common(false, kobj, buf, len); +} +HSTATE_ATTR(nr_hugepages); + +#ifdef CONFIG_NUMA + +/* + * hstate attribute for optionally mempolicy-based constraint on persistent + * huge page alloc/free. + */ +static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return nr_hugepages_show_common(kobj, attr, buf); +} + +static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + return nr_hugepages_store_common(true, kobj, buf, len); +} +HSTATE_ATTR(nr_hugepages_mempolicy); +#endif + + +static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj, NULL); + return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages); +} + +static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long input; + struct hstate *h = kobj_to_hstate(kobj, NULL); + + if (hstate_is_gigantic_no_runtime(h)) + return -EINVAL; + + err = kstrtoul(buf, 10, &input); + if (err) + return err; + + spin_lock_irq(&hugetlb_lock); + h->nr_overcommit_huge_pages = input; + spin_unlock_irq(&hugetlb_lock); + + return count; +} +HSTATE_ATTR(nr_overcommit_hugepages); + +static ssize_t free_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h; + unsigned long free_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + free_huge_pages = h->free_huge_pages; + else + free_huge_pages = h->free_huge_pages_node[nid]; + + return sysfs_emit(buf, "%lu\n", free_huge_pages); +} +HSTATE_ATTR_RO(free_hugepages); + +static ssize_t resv_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj, NULL); + return sysfs_emit(buf, "%lu\n", h->resv_huge_pages); +} +HSTATE_ATTR_RO(resv_hugepages); + +static ssize_t surplus_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h; + unsigned long surplus_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + surplus_huge_pages = h->surplus_huge_pages; + else + surplus_huge_pages = h->surplus_huge_pages_node[nid]; + + return sysfs_emit(buf, "%lu\n", surplus_huge_pages); +} +HSTATE_ATTR_RO(surplus_hugepages); + +static ssize_t demote_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + unsigned long nr_demote; + unsigned long nr_available; + nodemask_t nodes_allowed, *n_mask; + struct hstate *h; + int err; + int nid; + + err = kstrtoul(buf, 10, &nr_demote); + if (err) + return err; + h = kobj_to_hstate(kobj, &nid); + + if (nid != NUMA_NO_NODE) { + init_nodemask_of_node(&nodes_allowed, nid); + n_mask = &nodes_allowed; + } else { + n_mask = &node_states[N_MEMORY]; + } + + /* Synchronize with other sysfs operations modifying huge pages */ + mutex_lock(&h->resize_lock); + spin_lock_irq(&hugetlb_lock); + + while (nr_demote) { + long rc; + + /* + * Check for available pages to demote each time thorough the + * loop as demote_pool_huge_page will drop hugetlb_lock. + */ + if (nid != NUMA_NO_NODE) + nr_available = h->free_huge_pages_node[nid]; + else + nr_available = h->free_huge_pages; + nr_available -= h->resv_huge_pages; + if (!nr_available) + break; + + rc = demote_pool_huge_page(h, n_mask, nr_demote); + if (rc < 0) { + err = rc; + break; + } + + nr_demote -= rc; + } + + spin_unlock_irq(&hugetlb_lock); + mutex_unlock(&h->resize_lock); + + if (err) + return err; + return len; +} +HSTATE_ATTR_WO(demote); + +static ssize_t demote_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj, NULL); + unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; + + return sysfs_emit(buf, "%lukB\n", demote_size); +} + +static ssize_t demote_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct hstate *h, *demote_hstate; + unsigned long demote_size; + unsigned int demote_order; + + demote_size = (unsigned long)memparse(buf, NULL); + + demote_hstate = size_to_hstate(demote_size); + if (!demote_hstate) + return -EINVAL; + demote_order = demote_hstate->order; + if (demote_order < HUGETLB_PAGE_ORDER) + return -EINVAL; + + /* demote order must be smaller than hstate order */ + h = kobj_to_hstate(kobj, NULL); + if (demote_order >= h->order) + return -EINVAL; + + /* resize_lock synchronizes access to demote size and writes */ + mutex_lock(&h->resize_lock); + h->demote_order = demote_order; + mutex_unlock(&h->resize_lock); + + return count; +} +HSTATE_ATTR(demote_size); + +static struct attribute *hstate_attrs[] = { + &nr_hugepages_attr.attr, + &nr_overcommit_hugepages_attr.attr, + &free_hugepages_attr.attr, + &resv_hugepages_attr.attr, + &surplus_hugepages_attr.attr, +#ifdef CONFIG_NUMA + &nr_hugepages_mempolicy_attr.attr, +#endif + NULL, +}; + +static const struct attribute_group hstate_attr_group = { + .attrs = hstate_attrs, +}; + +static struct attribute *hstate_demote_attrs[] = { + &demote_size_attr.attr, + &demote_attr.attr, + NULL, +}; + +static const struct attribute_group hstate_demote_attr_group = { + .attrs = hstate_demote_attrs, +}; + +static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, + struct kobject **hstate_kobjs, + const struct attribute_group *hstate_attr_group) +{ + int retval; + int hi = hstate_index(h); + + hstate_kobjs[hi] = kobject_create_and_add(h->name, parent); + if (!hstate_kobjs[hi]) + return -ENOMEM; + + retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group); + if (retval) { + kobject_put(hstate_kobjs[hi]); + hstate_kobjs[hi] = NULL; + return retval; + } + + if (h->demote_order) { + retval = sysfs_create_group(hstate_kobjs[hi], + &hstate_demote_attr_group); + if (retval) { + pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); + sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group); + kobject_put(hstate_kobjs[hi]); + hstate_kobjs[hi] = NULL; + return retval; + } + } + + return 0; +} + +#ifdef CONFIG_NUMA +static bool hugetlb_sysfs_initialized __ro_after_init; + +/* + * node_hstate/s - associate per node hstate attributes, via their kobjects, + * with node devices in node_devices[] using a parallel array. The array + * index of a node device or _hstate == node id. + * This is here to avoid any static dependency of the node device driver, in + * the base kernel, on the hugetlb module. + */ +struct node_hstate { + struct kobject *hugepages_kobj; + struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; +}; +static struct node_hstate node_hstates[MAX_NUMNODES]; + +/* + * A subset of global hstate attributes for node devices + */ +static struct attribute *per_node_hstate_attrs[] = { + &nr_hugepages_attr.attr, + &free_hugepages_attr.attr, + &surplus_hugepages_attr.attr, + NULL, +}; + +static const struct attribute_group per_node_hstate_attr_group = { + .attrs = per_node_hstate_attrs, +}; + +/* + * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj. + * Returns node id via non-NULL nidp. + */ +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) +{ + int nid; + + for (nid = 0; nid < nr_node_ids; nid++) { + struct node_hstate *nhs = &node_hstates[nid]; + int i; + for (i = 0; i < HUGE_MAX_HSTATE; i++) + if (nhs->hstate_kobjs[i] == kobj) { + if (nidp) + *nidp = nid; + return &hstates[i]; + } + } + + BUG(); + return NULL; +} + +/* + * Unregister hstate attributes from a single node device. + * No-op if no hstate attributes attached. + */ +void hugetlb_unregister_node(struct node *node) +{ + struct hstate *h; + struct node_hstate *nhs = &node_hstates[node->dev.id]; + + if (!nhs->hugepages_kobj) + return; /* no hstate attributes */ + + for_each_hstate(h) { + int idx = hstate_index(h); + struct kobject *hstate_kobj = nhs->hstate_kobjs[idx]; + + if (!hstate_kobj) + continue; + if (h->demote_order) + sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group); + sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group); + kobject_put(hstate_kobj); + nhs->hstate_kobjs[idx] = NULL; + } + + kobject_put(nhs->hugepages_kobj); + nhs->hugepages_kobj = NULL; +} + + +/* + * Register hstate attributes for a single node device. + * No-op if attributes already registered. + */ +void hugetlb_register_node(struct node *node) +{ + struct hstate *h; + struct node_hstate *nhs = &node_hstates[node->dev.id]; + int err; + + if (!hugetlb_sysfs_initialized) + return; + + if (nhs->hugepages_kobj) + return; /* already allocated */ + + nhs->hugepages_kobj = kobject_create_and_add("hugepages", + &node->dev.kobj); + if (!nhs->hugepages_kobj) + return; + + for_each_hstate(h) { + err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj, + nhs->hstate_kobjs, + &per_node_hstate_attr_group); + if (err) { + pr_err("HugeTLB: Unable to add hstate %s for node %d\n", + h->name, node->dev.id); + hugetlb_unregister_node(node); + break; + } + } +} + +/* + * hugetlb init time: register hstate attributes for all registered node + * devices of nodes that have memory. All on-line nodes should have + * registered their associated device by this time. + */ +static void __init hugetlb_register_all_nodes(void) +{ + int nid; + + for_each_online_node(nid) + hugetlb_register_node(node_devices[nid]); +} +#else /* !CONFIG_NUMA */ + +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) +{ + BUG(); + if (nidp) + *nidp = -1; + return NULL; +} + +static void hugetlb_register_all_nodes(void) { } + +#endif + +void __init hugetlb_sysfs_init(void) +{ + struct hstate *h; + int err; + + hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); + if (!hugepages_kobj) + return; + + for_each_hstate(h) { + err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, + hstate_kobjs, &hstate_attr_group); + if (err) + pr_err("HugeTLB: Unable to add hstate %s\n", h->name); + } + +#ifdef CONFIG_NUMA + hugetlb_sysfs_initialized = true; +#endif + hugetlb_register_all_nodes(); +}