From aefd967dab6469f5b827b59e50016a760dcc1fbc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 Oct 2025 15:22:31 -0300 Subject: [PATCH] iommupt: Use the incoherent start/stop functions for PT_FEAT_DMA_INCOHERENT This is the first step to supporting an incoherent walker, start and stop the incoherence around the allocation and frees of the page table memory. The iommu_pages API maps this to dma_map/unmap_single(), or arch cache flushing calls. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 89 ++++++++++++++++++++------ drivers/iommu/generic_pt/kunit_iommu.h | 1 + drivers/iommu/generic_pt/pt_defs.h | 5 +- include/linux/generic_pt/common.h | 6 ++ include/linux/generic_pt/iommu.h | 7 ++ 5 files changed, 88 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 142001f5aa83..2cad07da995a 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -24,6 +24,10 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather, { struct pt_common *common = common_from_iommu(iommu_table); + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) + iommu_pages_stop_incoherent_list(free_list, + iommu_table->iommu_device); + if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) && iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) { iommu_iotlb_sync(&iommu_table->domain, iotlb_gather); @@ -329,35 +333,55 @@ static int __collect_tables(struct pt_range *range, void *arg, return 0; } -static inline struct pt_table_p *table_alloc_top(struct pt_common *common, - uintptr_t top_of_table, - gfp_t gfp) +enum alloc_mode {ALLOC_NORMAL, ALLOC_DEFER_COHERENT_FLUSH}; + +/* Allocate a table, the empty table will be ready to be installed. */ +static inline struct pt_table_p *_table_alloc(struct pt_common *common, + size_t lg2sz, gfp_t gfp, + enum alloc_mode mode) { struct pt_iommu *iommu_table = iommu_from_common(common); + struct pt_table_p *table_mem; + table_mem = iommu_alloc_pages_node_sz(iommu_table->nid, gfp, + log2_to_int(lg2sz)); + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) && + mode == ALLOC_NORMAL) { + int ret = iommu_pages_start_incoherent( + table_mem, iommu_table->iommu_device); + if (ret) { + iommu_free_pages(table_mem); + return ERR_PTR(ret); + } + } + return table_mem; +} + +static inline struct pt_table_p *table_alloc_top(struct pt_common *common, + uintptr_t top_of_table, + gfp_t gfp, + enum alloc_mode mode) +{ /* * Top doesn't need the free list or otherwise, so it technically * doesn't need to use iommu pages. Use the API anyhow as the top is * usually not smaller than PAGE_SIZE to keep things simple. */ - return iommu_alloc_pages_node_sz( - iommu_table->nid, gfp, - log2_to_int(pt_top_memsize_lg2(common, top_of_table))); + return _table_alloc(common, pt_top_memsize_lg2(common, top_of_table), + gfp, mode); } /* Allocate an interior table */ static inline struct pt_table_p *table_alloc(const struct pt_state *parent_pts, - gfp_t gfp) + gfp_t gfp, enum alloc_mode mode) { - struct pt_iommu *iommu_table = - iommu_from_common(parent_pts->range->common); struct pt_state child_pts = pt_init(parent_pts->range, parent_pts->level - 1, NULL); - return iommu_alloc_pages_node_sz( - iommu_table->nid, gfp, - log2_to_int(pt_num_items_lg2(&child_pts) + - ilog2(PT_ITEM_WORD_SIZE))); + return _table_alloc(parent_pts->range->common, + pt_num_items_lg2(&child_pts) + + ilog2(PT_ITEM_WORD_SIZE), + gfp, mode); } static inline int pt_iommu_new_table(struct pt_state *pts, @@ -370,13 +394,15 @@ static inline int pt_iommu_new_table(struct pt_state *pts, if (PT_WARN_ON(!pt_can_have_table(pts))) return -ENXIO; - table_mem = table_alloc(pts, attrs->gfp); + table_mem = table_alloc(pts, attrs->gfp, ALLOC_NORMAL); if (IS_ERR(table_mem)) return PTR_ERR(table_mem); phys = virt_to_phys(table_mem); if (!pt_install_table(pts, phys, attrs)) { - iommu_free_pages(table_mem); + iommu_pages_free_incoherent( + table_mem, + iommu_from_common(pts->range->common)->iommu_device); return -EAGAIN; } @@ -389,7 +415,9 @@ static inline int pt_iommu_new_table(struct pt_state *pts, pt_load_single_entry(pts); if (PT_WARN_ON(pt_table_pa(pts) != phys)) { pt_clear_entries(pts, ilog2(1)); - iommu_free_pages(table_mem); + iommu_pages_free_incoherent( + table_mem, iommu_from_common(pts->range->common) + ->iommu_device); return -EINVAL; } } @@ -615,8 +643,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range, } new_level = pts.level; - table_mem = table_alloc_top( - common, _pt_top_set(NULL, pts.level), map->attrs.gfp); + table_mem = + table_alloc_top(common, _pt_top_set(NULL, pts.level), + map->attrs.gfp, ALLOC_DEFER_COHERENT_FLUSH); if (IS_ERR(table_mem)) return PTR_ERR(table_mem); iommu_pages_list_add(&free_list, table_mem); @@ -633,6 +662,16 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range, new_top_of_table = _pt_top_set(pts.table, pts.level); } + /* + * Avoid double flushing, flush it once after all pt_install_table() + */ + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) { + ret = iommu_pages_start_incoherent_list( + &free_list, iommu_table->iommu_device); + if (ret) + goto err_free; + } + /* * top_of_table is write locked by the spinlock, but readers can use * READ_ONCE() to get the value. Since we encode both the level and the @@ -665,6 +704,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range, return 0; err_free: + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) + iommu_pages_stop_incoherent_list(&free_list, + iommu_table->iommu_device); iommu_put_pages_list(&free_list); return ret; } @@ -988,6 +1030,9 @@ static void NS(deinit)(struct pt_iommu *iommu_table) * The driver has to already have fenced the HW access to the page table * and invalidated any caching referring to this memory. */ + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) + iommu_pages_stop_incoherent_list(&collect.free_list, + iommu_table->iommu_device); iommu_put_pages_list(&collect.free_list); } @@ -1078,6 +1123,7 @@ static void pt_iommu_zero(struct pt_iommu_table *fmt_table) memset_after(fmt_table, 0, iommu.domain); /* The caller can initialize some of these values */ + iommu_table->iommu_device = cfg.iommu_device; iommu_table->driver_ops = cfg.driver_ops; iommu_table->nid = cfg.nid; } @@ -1123,11 +1169,16 @@ int pt_iommu_init(struct pt_iommu_table *fmt_table, pt_feature(common, PT_FEAT_DYNAMIC_TOP))) return -EINVAL; + if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) && + WARN_ON(!iommu_table->iommu_device)) + return -EINVAL; + ret = pt_iommu_init_domain(iommu_table, &iommu_table->domain); if (ret) return ret; - table_mem = table_alloc_top(common, common->top_of_table, gfp); + table_mem = table_alloc_top(common, common->top_of_table, gfp, + ALLOC_NORMAL); if (IS_ERR(table_mem)) return PTR_ERR(table_mem); pt_top_set(common, table_mem, pt_top_get_level(common)); diff --git a/drivers/iommu/generic_pt/kunit_iommu.h b/drivers/iommu/generic_pt/kunit_iommu.h index d541235632aa..5d4f269627d5 100644 --- a/drivers/iommu/generic_pt/kunit_iommu.h +++ b/drivers/iommu/generic_pt/kunit_iommu.h @@ -139,6 +139,7 @@ static int pt_kunit_priv_init(struct kunit *test, struct kunit_iommu_priv *priv) priv->fmt_table.iommu.nid = NUMA_NO_NODE; priv->fmt_table.iommu.driver_ops = &pt_kunit_driver_ops; + priv->fmt_table.iommu.iommu_device = priv->dummy_dev; priv->domain.ops = &kunit_pt_ops; ret = pt_iommu_init(&priv->fmt_table, &priv->cfg, GFP_KERNEL); if (ret) { diff --git a/drivers/iommu/generic_pt/pt_defs.h b/drivers/iommu/generic_pt/pt_defs.h index 819057de50d8..c25544d72f97 100644 --- a/drivers/iommu/generic_pt/pt_defs.h +++ b/drivers/iommu/generic_pt/pt_defs.h @@ -48,13 +48,16 @@ enum { /* * When in debug mode we compile all formats with all features. This allows the * kunit to test the full matrix. SIGN_EXTEND can't co-exist with DYNAMIC_TOP or - * FULL_VA. + * FULL_VA. DMA_INCOHERENT requires a SW bit that not all formats have */ #if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT) enum { PT_ORIG_SUPPORTED_FEATURES = PT_SUPPORTED_FEATURES, PT_DEBUG_SUPPORTED_FEATURES = UINT_MAX & + ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_DMA_INCOHERENT) ? + 0 : + BIT(PT_FEAT_DMA_INCOHERENT))) & ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_SIGN_EXTEND)) ? BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_FULL_VA) : BIT(PT_FEAT_SIGN_EXTEND)), diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index 96f8a6a7d60e..883069e32952 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -85,6 +85,12 @@ enum { * position. */ enum pt_features { + /** + * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before + * assuming the HW can read it. Otherwise a SMP release is sufficient + * for HW to read it. + */ + PT_FEAT_DMA_INCOHERENT, /** * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to * PT_VADDR_MAX. diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index fde7ccf007c5..21132e342a79 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -57,6 +57,13 @@ struct pt_iommu { * table walkers. */ int nid; + + /** + * @iommu_device: Device pointer used for any DMA cache flushing when + * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the + * page table which must have dma ops that perform cache flushing. + */ + struct device *iommu_device; }; /**