mirror of https://github.com/torvalds/linux.git
IOMMU Updates for Linux v6.19
Including:
- Introduction of the generic IO page-table framework with support for
Intel and AMD IOMMU formats from Jason. This has good potential for
unifying more IO page-table implementations and making future
enhancements more easy. But this also needed quite some fixes during
development. All known issues have been fixed, but my feeling is that
there is a higher potential than usual that more might be needed.
- Intel VT-d updates:
- Use right invalidation hint in qi_desc_iotlb().
- Reduce the scope of INTEL_IOMMU_FLOPPY_WA.
- ARM-SMMU updates:
- Qualcomm device-tree binding updates for Kaanapali and Glymur SoCs
and a new clock for the TBU.
- Fix error handling if level 1 CD table allocation fails.
- Permit more than the architectural maximum number of SMRs for funky
Qualcomm mis-implementations of SMMUv2.
- Mediatek driver:
- MT8189 iommu support.
- Move ARM IO-pgtable selftests to kunit.
- Device leak fixes for a couple of drivers.
- Random smaller fixes and improvements.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEr9jSbILcajRFYWYyK/BELZcBGuMFAmktjWgACgkQK/BELZcB
GuMQgA//YljqMVbMmYpFQF/9nXsyTvkpzaaVqj3CscjfnLJQ7YNIrWHLMw4xcZP7
c2zsSDMPc5LAe2nkyNPvMeFufsbDOYE1CcbqFfZhNqwKGWIVs7J1j73aqJXKfXB4
RaVv5GA1NFeeIRRKPx/ZpD9W1WiR9PiUQXBxNTAJLzbBNhcTJzo+3YuSpJ6ckOak
aG67Aox6Dwq/u0gHy8gWnuA2XL6Eit+nQbod7TfchHoRu+TUdbv8qWL+sUChj+u/
IlyBt1YL/do3bJC0G1G2E81J1KGPU/OZRfB34STQKlopEdXX17ax3b2X0bt3Hz/h
9Yk3BLDtGMBQ0aVZzAZcOLLlBlEpPiMKBVuJQj29kK9KJSYfmr2iolOK0cGyt+kv
DfQ8+nv6HRFMbwjetfmhGYf6WemPcggvX44Hm/rgR2qbN3P+Q8/klyyH8MLuQeqO
ttoQIwDd9DYKJelmWzbLgpb2vGE3O0EAFhiTcCKOk643PaudfengWYKZpJVIIqtF
nEUEpk17HlpgFkYrtmIE7CMONqUGaQYO84R3j7DXcYXYAvqQJkhR3uJejlWQeh8x
uMc9y04jpg3p5vC5c7LfkQ3at3p/jf7jzz4GuNoZP5bdVIUkwXXolir0ct3/oby3
/bXXNA1pSaRuUADm7pYBBhKYAFKC7vCSa8LVaDR3CB95aNZnvS0=
=KG7j
-----END PGP SIGNATURE-----
Merge tag 'iommu-updates-v6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
Pull iommu updates from Joerg Roedel:
- Introduction of the generic IO page-table framework with support for
Intel and AMD IOMMU formats from Jason.
This has good potential for unifying more IO page-table
implementations and making future enhancements more easy. But this
also needed quite some fixes during development. All known issues
have been fixed, but my feeling is that there is a higher potential
than usual that more might be needed.
- Intel VT-d updates:
- Use right invalidation hint in qi_desc_iotlb()
- Reduce the scope of INTEL_IOMMU_FLOPPY_WA
- ARM-SMMU updates:
- Qualcomm device-tree binding updates for Kaanapali and Glymur SoCs
and a new clock for the TBU.
- Fix error handling if level 1 CD table allocation fails.
- Permit more than the architectural maximum number of SMRs for
funky Qualcomm mis-implementations of SMMUv2.
- Mediatek driver:
- MT8189 iommu support
- Move ARM IO-pgtable selftests to kunit
- Device leak fixes for a couple of drivers
- Random smaller fixes and improvements
* tag 'iommu-updates-v6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux: (81 commits)
iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires
powerpc/pseries/svm: Make mem_encrypt.h self contained
genpt: Make GENERIC_PT invisible
iommupt: Avoid a compiler bug with sw_bit
iommu/arm-smmu-qcom: Enable use of all SMR groups when running bare-metal
iommupt: Fix unlikely flows in increase_top()
iommu/amd: Propagate the error code returned by __modify_irte_ga() in modify_irte_ga()
MAINTAINERS: Update my email address
iommu/arm-smmu-v3: Fix error check in arm_smmu_alloc_cd_tables
dt-bindings: iommu: qcom_iommu: Allow 'tbu' clock
iommu/vt-d: Restore previous domain::aperture_end calculation
iommu/vt-d: Fix unused invalidation hint in qi_desc_iotlb
iommu/vt-d: Set INTEL_IOMMU_FLOPPY_WA depend on BLK_DEV_FD
iommu/tegra: fix device leak on probe_device()
iommu/sun50i: fix device leak on of_xlate()
iommu/omap: simplify probe_device() error handling
iommu/omap: fix device leaks on probe_device()
iommu/mediatek-v1: add missing larb count sanity check
iommu/mediatek-v1: fix device leaks on probe()
...
This commit is contained in:
commit
ce5cfb0fa2
|
|
@ -415,6 +415,7 @@ ForEachMacros:
|
|||
- 'for_each_prop_dlc_cpus'
|
||||
- 'for_each_prop_dlc_platforms'
|
||||
- 'for_each_property_of_node'
|
||||
- 'for_each_pt_level_entry'
|
||||
- 'for_each_rdt_resource'
|
||||
- 'for_each_reg'
|
||||
- 'for_each_reg_filtered'
|
||||
|
|
|
|||
3
.mailmap
3
.mailmap
|
|
@ -345,7 +345,8 @@ Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
|
|||
Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com>
|
||||
Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
|
||||
Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
|
||||
<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
|
||||
Jean-Philippe Brucker <jpb@kernel.org> <jean-philippe.brucker@arm.com>
|
||||
Jean-Philippe Brucker <jpb@kernel.org> <jean-philippe@linaro.org>
|
||||
Jean-Michel Hautbois <jeanmichel.hautbois@yoseli.org> <jeanmichel.hautbois@ideasonboard.com>
|
||||
Jean Tourrilhes <jt@hpl.hp.com>
|
||||
Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org>
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ properties:
|
|||
- description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
|
||||
items:
|
||||
- enum:
|
||||
- qcom,glymur-smmu-500
|
||||
- qcom,kaanapali-smmu-500
|
||||
- qcom,milos-smmu-500
|
||||
- qcom,qcm2290-smmu-500
|
||||
- qcom,qcs615-smmu-500
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ properties:
|
|||
- mediatek,mt8188-iommu-vdo # generation two
|
||||
- mediatek,mt8188-iommu-vpp # generation two
|
||||
- mediatek,mt8188-iommu-infra # generation two
|
||||
- mediatek,mt8189-iommu-apu # generation two
|
||||
- mediatek,mt8189-iommu-infra # generation two
|
||||
- mediatek,mt8189-iommu-mm # generation two
|
||||
- mediatek,mt8192-m4u # generation two
|
||||
- mediatek,mt8195-iommu-vdo # generation two
|
||||
- mediatek,mt8195-iommu-vpp # generation two
|
||||
|
|
@ -128,6 +131,7 @@ properties:
|
|||
This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as
|
||||
defined in
|
||||
dt-binding/memory/mediatek,mt8188-memory-port.h for mt8188,
|
||||
dt-binding/memory/mediatek,mt8189-memory-port.h for mt8189,
|
||||
dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623,
|
||||
dt-binding/memory/mt2712-larb-port.h for mt2712,
|
||||
dt-binding/memory/mt6779-larb-port.h for mt6779,
|
||||
|
|
@ -164,6 +168,7 @@ allOf:
|
|||
- mediatek,mt8186-iommu-mm
|
||||
- mediatek,mt8188-iommu-vdo
|
||||
- mediatek,mt8188-iommu-vpp
|
||||
- mediatek,mt8189-iommu-mm
|
||||
- mediatek,mt8192-m4u
|
||||
- mediatek,mt8195-iommu-vdo
|
||||
- mediatek,mt8195-iommu-vpp
|
||||
|
|
@ -180,6 +185,7 @@ allOf:
|
|||
- mediatek,mt8186-iommu-mm
|
||||
- mediatek,mt8188-iommu-vdo
|
||||
- mediatek,mt8188-iommu-vpp
|
||||
- mediatek,mt8189-iommu-mm
|
||||
- mediatek,mt8192-m4u
|
||||
- mediatek,mt8195-iommu-vdo
|
||||
- mediatek,mt8195-iommu-vpp
|
||||
|
|
@ -208,6 +214,8 @@ allOf:
|
|||
contains:
|
||||
enum:
|
||||
- mediatek,mt8188-iommu-infra
|
||||
- mediatek,mt8189-iommu-apu
|
||||
- mediatek,mt8189-iommu-infra
|
||||
- mediatek,mt8195-iommu-infra
|
||||
|
||||
then:
|
||||
|
|
|
|||
|
|
@ -32,14 +32,18 @@ properties:
|
|||
- const: qcom,msm-iommu-v2
|
||||
|
||||
clocks:
|
||||
minItems: 2
|
||||
items:
|
||||
- description: Clock required for IOMMU register group access
|
||||
- description: Clock required for underlying bus access
|
||||
- description: Clock required for Translation Buffer Unit access
|
||||
|
||||
clock-names:
|
||||
minItems: 2
|
||||
items:
|
||||
- const: iface
|
||||
- const: bus
|
||||
- const: tbu
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========================
|
||||
Generic Radix Page Table
|
||||
========================
|
||||
|
||||
.. kernel-doc:: include/linux/generic_pt/common.h
|
||||
:doc: Generic Radix Page Table
|
||||
|
||||
.. kernel-doc:: drivers/iommu/generic_pt/pt_defs.h
|
||||
:doc: Generic Page Table Language
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Generic PT is structured as a multi-compilation system. Since each format
|
||||
provides an API using a common set of names there can be only one format active
|
||||
within a compilation unit. This design avoids function pointers around the low
|
||||
level API.
|
||||
|
||||
Instead the function pointers can end up at the higher level API (i.e.
|
||||
map/unmap, etc.) and the per-format code can be directly inlined into the
|
||||
per-format compilation unit. For something like IOMMU each format will be
|
||||
compiled into a per-format IOMMU operations kernel module.
|
||||
|
||||
For this to work the .c file for each compilation unit will include both the
|
||||
format headers and the generic code for the implementation. For instance in an
|
||||
implementation compilation unit the headers would normally be included as
|
||||
follows:
|
||||
|
||||
generic_pt/fmt/iommu_amdv1.c::
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include "defs_amdv1.h"
|
||||
#include "../pt_defs.h"
|
||||
#include "amdv1.h"
|
||||
#include "../pt_common.h"
|
||||
#include "../pt_iter.h"
|
||||
#include "../iommu_pt.h" /* The IOMMU implementation */
|
||||
|
||||
iommu_pt.h includes definitions that will generate the operations functions for
|
||||
map/unmap/etc. using the definitions provided by AMDv1. The resulting module
|
||||
will have exported symbols named like pt_iommu_amdv1_init().
|
||||
|
||||
Refer to drivers/iommu/generic_pt/fmt/iommu_template.h for an example of how the
|
||||
IOMMU implementation uses multi-compilation to generate per-format ops structs
|
||||
pointers.
|
||||
|
||||
The format code is written so that the common names arise from #defines to
|
||||
distinct format specific names. This is intended to aid debuggability by
|
||||
avoiding symbol clashes across all the different formats.
|
||||
|
||||
Exported symbols and other global names are mangled using a per-format string
|
||||
via the NS() helper macro.
|
||||
|
||||
The format uses struct pt_common as the top-level struct for the table,
|
||||
and each format will have its own struct pt_xxx which embeds it to store
|
||||
format-specific information.
|
||||
|
||||
The implementation will further wrap struct pt_common in its own top-level
|
||||
struct, such as struct pt_iommu_amdv1.
|
||||
|
||||
Format functions at the struct pt_common level
|
||||
----------------------------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/generic_pt/common.h
|
||||
:identifiers:
|
||||
.. kernel-doc:: drivers/iommu/generic_pt/pt_common.h
|
||||
|
||||
Iteration Helpers
|
||||
-----------------
|
||||
|
||||
.. kernel-doc:: drivers/iommu/generic_pt/pt_iter.h
|
||||
|
||||
Writing a Format
|
||||
----------------
|
||||
|
||||
It is best to start from a simple format that is similar to the target. x86_64
|
||||
is usually a good reference for something simple, and AMDv1 is something fairly
|
||||
complete.
|
||||
|
||||
The required inline functions need to be implemented in the format header.
|
||||
These should all follow the standard pattern of::
|
||||
|
||||
static inline pt_oaddr_t amdv1pt_entry_oa(const struct pt_state *pts)
|
||||
{
|
||||
[..]
|
||||
}
|
||||
#define pt_entry_oa amdv1pt_entry_oa
|
||||
|
||||
where a uniquely named per-format inline function provides the implementation
|
||||
and a define maps it to the generic name. This is intended to make debug symbols
|
||||
work better. inline functions should always be used as the prototypes in
|
||||
pt_common.h will cause the compiler to validate the function signature to
|
||||
prevent errors.
|
||||
|
||||
Review pt_fmt_defaults.h to understand some of the optional inlines.
|
||||
|
||||
Once the format compiles then it should be run through the generic page table
|
||||
kunit test in kunit_generic_pt.h using kunit. For example::
|
||||
|
||||
$ tools/testing/kunit/kunit.py run --build_dir build_kunit_x86_64 --arch x86_64 --kunitconfig ./drivers/iommu/generic_pt/.kunitconfig amdv1_fmt_test.*
|
||||
[...]
|
||||
[11:15:08] Testing complete. Ran 9 tests: passed: 9
|
||||
[11:15:09] Elapsed time: 3.137s total, 0.001s configuring, 2.368s building, 0.311s running
|
||||
|
||||
The generic tests are intended to prove out the format functions and give
|
||||
clearer failures to speed up finding the problems. Once those pass then the
|
||||
entire kunit suite should be run.
|
||||
|
||||
IOMMU Invalidation Features
|
||||
---------------------------
|
||||
|
||||
Invalidation is how the page table algorithms synchronize with a HW cache of the
|
||||
page table memory, typically called the TLB (or IOTLB for IOMMU cases).
|
||||
|
||||
The TLB can store present PTEs, non-present PTEs and table pointers, depending
|
||||
on its design. Every HW has its own approach on how to describe what has changed
|
||||
to have changed items removed from the TLB.
|
||||
|
||||
PT_FEAT_FLUSH_RANGE
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
PT_FEAT_FLUSH_RANGE is the easiest scheme to understand. It tries to generate a
|
||||
single range invalidation for each operation, over-invalidating if there are
|
||||
gaps of VA that don't need invalidation. This trades off impacted VA for number
|
||||
of invalidation operations. It does not keep track of what is being invalidated;
|
||||
however, if pages have to be freed then page table pointers have to be cleaned
|
||||
from the walk cache. The range can start/end at any page boundary.
|
||||
|
||||
PT_FEAT_FLUSH_RANGE_NO_GAPS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
PT_FEAT_FLUSH_RANGE_NO_GAPS is similar to PT_FEAT_FLUSH_RANGE; however, it tries
|
||||
to minimize the amount of impacted VA by issuing extra flush operations. This is
|
||||
useful if the cost of processing VA is very high, for instance because a
|
||||
hypervisor is processing the page table with a shadowing algorithm.
|
||||
|
|
@ -93,6 +93,7 @@ Subsystem-specific APIs
|
|||
frame-buffer
|
||||
aperture
|
||||
generic-counter
|
||||
generic_pt
|
||||
gpio/index
|
||||
hsi
|
||||
hte/index
|
||||
|
|
|
|||
|
|
@ -388,7 +388,7 @@ B: https://bugzilla.kernel.org
|
|||
F: drivers/acpi/*thermal*
|
||||
|
||||
ACPI VIOT DRIVER
|
||||
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
|
||||
M: Jean-Philippe Brucker <jpb@kernel.org>
|
||||
L: linux-acpi@vger.kernel.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
|
|
@ -2269,7 +2269,7 @@ F: drivers/iommu/arm/
|
|||
F: drivers/iommu/io-pgtable-arm*
|
||||
|
||||
ARM SMMU SVA SUPPORT
|
||||
R: Jean-Philippe Brucker <jean-philippe@linaro.org>
|
||||
R: Jean-Philippe Brucker <jpb@kernel.org>
|
||||
F: drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
|
||||
|
||||
ARM SUB-ARCHITECTURES
|
||||
|
|
@ -27455,7 +27455,7 @@ F: drivers/virtio/virtio_input.c
|
|||
F: include/uapi/linux/virtio_input.h
|
||||
|
||||
VIRTIO IOMMU DRIVER
|
||||
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
|
||||
M: Jean-Philippe Brucker <jpb@kernel.org>
|
||||
L: virtualization@lists.linux.dev
|
||||
S: Maintained
|
||||
F: drivers/iommu/virtio-iommu.c
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@
|
|||
#define _ASM_POWERPC_MEM_ENCRYPT_H
|
||||
|
||||
#include <asm/svm.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct device;
|
||||
|
||||
static inline bool force_dma_unencrypted(struct device *dev)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1156,7 +1156,8 @@ EXPORT_SYMBOL_GPL(iommu_add_device);
|
|||
*/
|
||||
static int
|
||||
spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct iommu_table_group *table_group;
|
||||
|
|
@ -1189,7 +1190,7 @@ static struct iommu_domain spapr_tce_platform_domain = {
|
|||
|
||||
static int
|
||||
spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_group *grp = iommu_group_get(dev);
|
||||
struct iommu_table_group *table_group;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ config ARM_AMBA
|
|||
if ARM_AMBA
|
||||
|
||||
config TEGRA_AHB
|
||||
bool
|
||||
bool "Enable AHB driver for NVIDIA Tegra SoCs" if COMPILE_TEST
|
||||
default y if ARCH_TEGRA
|
||||
help
|
||||
Adds AHB configuration functionality for NVIDIA Tegra SoCs,
|
||||
|
|
|
|||
|
|
@ -40,12 +40,13 @@ config IOMMU_IO_PGTABLE_LPAE
|
|||
sizes at both stage-1 and stage-2, as well as address spaces
|
||||
up to 48-bits in size.
|
||||
|
||||
config IOMMU_IO_PGTABLE_LPAE_SELFTEST
|
||||
bool "LPAE selftests"
|
||||
depends on IOMMU_IO_PGTABLE_LPAE
|
||||
config IOMMU_IO_PGTABLE_LPAE_KUNIT_TEST
|
||||
tristate "KUnit tests for LPAE"
|
||||
depends on IOMMU_IO_PGTABLE_LPAE && KUNIT
|
||||
default KUNIT_ALL_TESTS
|
||||
help
|
||||
Enable self-tests for LPAE page table allocator. This performs
|
||||
a series of page-table consistency checks during boot.
|
||||
Enable kunit tests for LPAE page table allocator. This performs
|
||||
a series of page-table consistency checks.
|
||||
|
||||
If unsure, say N here.
|
||||
|
||||
|
|
@ -247,7 +248,7 @@ config SUN50I_IOMMU
|
|||
|
||||
config TEGRA_IOMMU_SMMU
|
||||
bool "NVIDIA Tegra SMMU Support"
|
||||
depends on ARCH_TEGRA
|
||||
depends on ARCH_TEGRA || COMPILE_TEST
|
||||
depends on TEGRA_AHB
|
||||
depends on TEGRA_MC
|
||||
select IOMMU_API
|
||||
|
|
@ -384,3 +385,5 @@ config SPRD_IOMMU
|
|||
Say Y here if you want to use the multimedia devices listed above.
|
||||
|
||||
endif # IOMMU_SUPPORT
|
||||
|
||||
source "drivers/iommu/generic_pt/Kconfig"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ obj-y += arm/ iommufd/
|
|||
obj-$(CONFIG_AMD_IOMMU) += amd/
|
||||
obj-$(CONFIG_INTEL_IOMMU) += intel/
|
||||
obj-$(CONFIG_RISCV_IOMMU) += riscv/
|
||||
obj-$(CONFIG_GENERIC_PT) += generic_pt/fmt/
|
||||
obj-$(CONFIG_IOMMU_API) += iommu.o
|
||||
obj-$(CONFIG_IOMMU_SUPPORT) += iommu-pages.o
|
||||
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
|
||||
|
|
@ -12,6 +13,7 @@ obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
|
|||
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
|
||||
obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
|
||||
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
|
||||
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE_KUNIT_TEST) += io-pgtable-arm-selftests.o
|
||||
obj-$(CONFIG_IOMMU_IO_PGTABLE_DART) += io-pgtable-dart.o
|
||||
obj-$(CONFIG_IOMMU_IOVA) += iova.o
|
||||
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
|
||||
|
|
|
|||
|
|
@ -11,10 +11,13 @@ config AMD_IOMMU
|
|||
select MMU_NOTIFIER
|
||||
select IOMMU_API
|
||||
select IOMMU_IOVA
|
||||
select IOMMU_IO_PGTABLE
|
||||
select IOMMU_SVA
|
||||
select IOMMU_IOPF
|
||||
select IOMMUFD_DRIVER if IOMMUFD
|
||||
select GENERIC_PT
|
||||
select IOMMU_PT
|
||||
select IOMMU_PT_AMDV1
|
||||
select IOMMU_PT_X86_64
|
||||
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
|
||||
help
|
||||
With this option you can enable support for AMD IOMMU hardware in
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
|
||||
obj-y += iommu.o init.o quirks.o ppr.o pasid.o
|
||||
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
|
|||
* the IOMMU used by this driver.
|
||||
*/
|
||||
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
|
||||
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
|
||||
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
|
||||
u64 address, size_t size);
|
||||
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
|
||||
/*
|
||||
* Maximum number of IOMMUs supported
|
||||
|
|
@ -247,6 +247,10 @@
|
|||
#define CMD_BUFFER_ENTRIES 512
|
||||
#define MMIO_CMD_SIZE_SHIFT 56
|
||||
#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
|
||||
#define MMIO_CMD_HEAD_MASK GENMASK_ULL(18, 4) /* Command buffer head ptr field [18:4] */
|
||||
#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
|
||||
#define MMIO_CMD_TAIL_MASK GENMASK_ULL(18, 4) /* Command buffer tail ptr field [18:4] */
|
||||
#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
|
||||
|
||||
/* constants for event buffer handling */
|
||||
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
|
||||
|
|
@ -337,76 +341,7 @@
|
|||
#define GUEST_PGTABLE_4_LEVEL 0x00
|
||||
#define GUEST_PGTABLE_5_LEVEL 0x01
|
||||
|
||||
#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
|
||||
#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
|
||||
((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
|
||||
(0xffffffffffffffffULL))
|
||||
#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
|
||||
#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
|
||||
#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
|
||||
IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
|
||||
#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
|
||||
|
||||
#define PM_MAP_4k 0
|
||||
#define PM_ADDR_MASK 0x000ffffffffff000ULL
|
||||
#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
|
||||
(~((1ULL << (12 + ((lvl) * 9))) - 1)))
|
||||
#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
|
||||
|
||||
/*
|
||||
* Returns the page table level to use for a given page size
|
||||
* Pagesize is expected to be a power-of-two
|
||||
*/
|
||||
#define PAGE_SIZE_LEVEL(pagesize) \
|
||||
((__ffs(pagesize) - 12) / 9)
|
||||
/*
|
||||
* Returns the number of ptes to use for a given page size
|
||||
* Pagesize is expected to be a power-of-two
|
||||
*/
|
||||
#define PAGE_SIZE_PTE_COUNT(pagesize) \
|
||||
(1ULL << ((__ffs(pagesize) - 12) % 9))
|
||||
|
||||
/*
|
||||
* Aligns a given io-virtual address to a given page size
|
||||
* Pagesize is expected to be a power-of-two
|
||||
*/
|
||||
#define PAGE_SIZE_ALIGN(address, pagesize) \
|
||||
((address) & ~((pagesize) - 1))
|
||||
/*
|
||||
* Creates an IOMMU PTE for an address and a given pagesize
|
||||
* The PTE has no permission bits set
|
||||
* Pagesize is expected to be a power-of-two larger than 4096
|
||||
*/
|
||||
#define PAGE_SIZE_PTE(address, pagesize) \
|
||||
(((address) | ((pagesize) - 1)) & \
|
||||
(~(pagesize >> 1)) & PM_ADDR_MASK)
|
||||
|
||||
/*
|
||||
* Takes a PTE value with mode=0x07 and returns the page size it maps
|
||||
*/
|
||||
#define PTE_PAGE_SIZE(pte) \
|
||||
(1ULL << (1 + ffz(((pte) | 0xfffULL))))
|
||||
|
||||
/*
|
||||
* Takes a page-table level and returns the default page-size for this level
|
||||
*/
|
||||
#define PTE_LEVEL_PAGE_SIZE(level) \
|
||||
(1ULL << (12 + (9 * (level))))
|
||||
|
||||
/*
|
||||
* The IOPTE dirty bit
|
||||
*/
|
||||
#define IOMMU_PTE_HD_BIT (6)
|
||||
|
||||
/*
|
||||
* Bit value definition for I/O PTE fields
|
||||
*/
|
||||
#define IOMMU_PTE_PR BIT_ULL(0)
|
||||
#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT)
|
||||
#define IOMMU_PTE_U BIT_ULL(59)
|
||||
#define IOMMU_PTE_FC BIT_ULL(60)
|
||||
#define IOMMU_PTE_IR BIT_ULL(61)
|
||||
#define IOMMU_PTE_IW BIT_ULL(62)
|
||||
|
||||
/*
|
||||
* Bit value definition for DTE fields
|
||||
|
|
@ -436,12 +371,6 @@
|
|||
/* DTE[128:179] | DTE[184:191] */
|
||||
#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52)
|
||||
|
||||
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
|
||||
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
|
||||
#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
|
||||
#define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK))
|
||||
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
|
||||
|
||||
#define IOMMU_PROT_MASK 0x03
|
||||
#define IOMMU_PROT_IR 0x01
|
||||
#define IOMMU_PROT_IW 0x02
|
||||
|
|
@ -534,19 +463,6 @@ struct amd_irte_ops;
|
|||
|
||||
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
|
||||
|
||||
#define io_pgtable_to_data(x) \
|
||||
container_of((x), struct amd_io_pgtable, pgtbl)
|
||||
|
||||
#define io_pgtable_ops_to_data(x) \
|
||||
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
|
||||
|
||||
#define io_pgtable_ops_to_domain(x) \
|
||||
container_of(io_pgtable_ops_to_data(x), \
|
||||
struct protection_domain, iop)
|
||||
|
||||
#define io_pgtable_cfg_to_data(x) \
|
||||
container_of((x), struct amd_io_pgtable, pgtbl.cfg)
|
||||
|
||||
struct gcr3_tbl_info {
|
||||
u64 *gcr3_tbl; /* Guest CR3 table */
|
||||
int glx; /* Number of levels for GCR3 table */
|
||||
|
|
@ -554,14 +470,6 @@ struct gcr3_tbl_info {
|
|||
u16 domid; /* Per device domain ID */
|
||||
};
|
||||
|
||||
struct amd_io_pgtable {
|
||||
seqcount_t seqcount; /* Protects root/mode update */
|
||||
struct io_pgtable pgtbl;
|
||||
int mode;
|
||||
u64 *root;
|
||||
u64 *pgd; /* v2 pgtable pgd pointer */
|
||||
};
|
||||
|
||||
enum protection_domain_mode {
|
||||
PD_MODE_NONE,
|
||||
PD_MODE_V1,
|
||||
|
|
@ -589,10 +497,13 @@ struct pdom_iommu_info {
|
|||
* independent of their use.
|
||||
*/
|
||||
struct protection_domain {
|
||||
union {
|
||||
struct iommu_domain domain;
|
||||
struct pt_iommu iommu;
|
||||
struct pt_iommu_amdv1 amdv1;
|
||||
struct pt_iommu_x86_64 amdv2;
|
||||
};
|
||||
struct list_head dev_list; /* List of all devices in this domain */
|
||||
struct iommu_domain domain; /* generic domain handle used by
|
||||
iommu core code */
|
||||
struct amd_io_pgtable iop;
|
||||
spinlock_t lock; /* mostly used to lock the page table*/
|
||||
u16 id; /* the domain id written to the device table */
|
||||
enum protection_domain_mode pd_mode; /* Track page table type */
|
||||
|
|
@ -602,6 +513,9 @@ struct protection_domain {
|
|||
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
|
||||
struct list_head dev_data_list; /* List of pdom_dev_data */
|
||||
};
|
||||
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv2.iommu, domain);
|
||||
|
||||
/*
|
||||
* This structure contains information about one PCI segment in the system.
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) {
|
||||
if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) {
|
||||
iommu->dbg_mmio_offset = -1;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1710,13 +1710,22 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
|
|||
list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
|
||||
|
||||
if (alloc_dev_table(pci_seg))
|
||||
return NULL;
|
||||
goto err_free_pci_seg;
|
||||
if (alloc_alias_table(pci_seg))
|
||||
return NULL;
|
||||
goto err_free_dev_table;
|
||||
if (alloc_rlookup_table(pci_seg))
|
||||
return NULL;
|
||||
goto err_free_alias_table;
|
||||
|
||||
return pci_seg;
|
||||
|
||||
err_free_alias_table:
|
||||
free_alias_table(pci_seg);
|
||||
err_free_dev_table:
|
||||
free_dev_table(pci_seg);
|
||||
err_free_pci_seg:
|
||||
list_del(&pci_seg->list);
|
||||
kfree(pci_seg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
|
||||
|
|
|
|||
|
|
@ -1,577 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* CPU-agnostic AMD IO page table allocator.
|
||||
*
|
||||
* Copyright (C) 2020 Advanced Micro Devices, Inc.
|
||||
* Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "AMD-Vi: " fmt
|
||||
#define dev_fmt(fmt) pr_fmt(fmt)
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
|
||||
#include "amd_iommu_types.h"
|
||||
#include "amd_iommu.h"
|
||||
#include "../iommu-pages.h"
|
||||
|
||||
/*
|
||||
* Helper function to get the first pte of a large mapping
|
||||
*/
|
||||
static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
|
||||
unsigned long *count)
|
||||
{
|
||||
unsigned long pte_mask, pg_size, cnt;
|
||||
u64 *fpte;
|
||||
|
||||
pg_size = PTE_PAGE_SIZE(*pte);
|
||||
cnt = PAGE_SIZE_PTE_COUNT(pg_size);
|
||||
pte_mask = ~((cnt << 3) - 1);
|
||||
fpte = (u64 *)(((unsigned long)pte) & pte_mask);
|
||||
|
||||
if (page_size)
|
||||
*page_size = pg_size;
|
||||
|
||||
if (count)
|
||||
*count = cnt;
|
||||
|
||||
return fpte;
|
||||
}
|
||||
|
||||
static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl)
|
||||
{
|
||||
u64 *p;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 512; ++i) {
|
||||
/* PTE present? */
|
||||
if (!IOMMU_PTE_PRESENT(pt[i]))
|
||||
continue;
|
||||
|
||||
/* Large PTE? */
|
||||
if (PM_PTE_LEVEL(pt[i]) == 0 ||
|
||||
PM_PTE_LEVEL(pt[i]) == 7)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Free the next level. No need to look at l1 tables here since
|
||||
* they can only contain leaf PTEs; just free them directly.
|
||||
*/
|
||||
p = IOMMU_PTE_PAGE(pt[i]);
|
||||
if (lvl > 2)
|
||||
free_pt_lvl(p, freelist, lvl - 1);
|
||||
else
|
||||
iommu_pages_list_add(freelist, p);
|
||||
}
|
||||
|
||||
iommu_pages_list_add(freelist, pt);
|
||||
}
|
||||
|
||||
static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist)
|
||||
{
|
||||
switch (mode) {
|
||||
case PAGE_MODE_NONE:
|
||||
case PAGE_MODE_7_LEVEL:
|
||||
break;
|
||||
case PAGE_MODE_1_LEVEL:
|
||||
iommu_pages_list_add(freelist, root);
|
||||
break;
|
||||
case PAGE_MODE_2_LEVEL:
|
||||
case PAGE_MODE_3_LEVEL:
|
||||
case PAGE_MODE_4_LEVEL:
|
||||
case PAGE_MODE_5_LEVEL:
|
||||
case PAGE_MODE_6_LEVEL:
|
||||
free_pt_lvl(root, freelist, mode);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used to add another level to an IO page table. Adding
|
||||
* another level increases the size of the address space by 9 bits to a size up
|
||||
* to 64 bits.
|
||||
*/
|
||||
static bool increase_address_space(struct amd_io_pgtable *pgtable,
|
||||
unsigned long address,
|
||||
unsigned int page_size_level,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
|
||||
struct protection_domain *domain =
|
||||
container_of(pgtable, struct protection_domain, iop);
|
||||
unsigned long flags;
|
||||
bool ret = true;
|
||||
u64 *pte;
|
||||
|
||||
pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K);
|
||||
if (!pte)
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
if (address <= PM_LEVEL_SIZE(pgtable->mode) &&
|
||||
pgtable->mode - 1 >= page_size_level)
|
||||
goto out;
|
||||
|
||||
ret = false;
|
||||
if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level))
|
||||
goto out;
|
||||
|
||||
*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
|
||||
|
||||
write_seqcount_begin(&pgtable->seqcount);
|
||||
pgtable->root = pte;
|
||||
pgtable->mode += 1;
|
||||
write_seqcount_end(&pgtable->seqcount);
|
||||
|
||||
amd_iommu_update_and_flush_device_table(domain);
|
||||
|
||||
pte = NULL;
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
iommu_free_pages(pte);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
|
||||
unsigned long address,
|
||||
unsigned long page_size,
|
||||
u64 **pte_page,
|
||||
gfp_t gfp,
|
||||
bool *updated)
|
||||
{
|
||||
unsigned long last_addr = address + (page_size - 1);
|
||||
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
|
||||
unsigned int seqcount;
|
||||
int level, end_lvl;
|
||||
u64 *pte, *page;
|
||||
|
||||
BUG_ON(!is_power_of_2(page_size));
|
||||
|
||||
while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
|
||||
pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) {
|
||||
/*
|
||||
* Return an error if there is no memory to update the
|
||||
* page-table.
|
||||
*/
|
||||
if (!increase_address_space(pgtable, last_addr,
|
||||
PAGE_SIZE_LEVEL(page_size), gfp))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
do {
|
||||
seqcount = read_seqcount_begin(&pgtable->seqcount);
|
||||
|
||||
level = pgtable->mode - 1;
|
||||
pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
|
||||
} while (read_seqcount_retry(&pgtable->seqcount, seqcount));
|
||||
|
||||
|
||||
address = PAGE_SIZE_ALIGN(address, page_size);
|
||||
end_lvl = PAGE_SIZE_LEVEL(page_size);
|
||||
|
||||
while (level > end_lvl) {
|
||||
u64 __pte, __npte;
|
||||
int pte_level;
|
||||
|
||||
__pte = *pte;
|
||||
pte_level = PM_PTE_LEVEL(__pte);
|
||||
|
||||
/*
|
||||
* If we replace a series of large PTEs, we need
|
||||
* to tear down all of them.
|
||||
*/
|
||||
if (IOMMU_PTE_PRESENT(__pte) &&
|
||||
pte_level == PAGE_MODE_7_LEVEL) {
|
||||
unsigned long count, i;
|
||||
u64 *lpte;
|
||||
|
||||
lpte = first_pte_l7(pte, NULL, &count);
|
||||
|
||||
/*
|
||||
* Unmap the replicated PTEs that still match the
|
||||
* original large mapping
|
||||
*/
|
||||
for (i = 0; i < count; ++i)
|
||||
cmpxchg64(&lpte[i], __pte, 0ULL);
|
||||
|
||||
*updated = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(__pte) ||
|
||||
pte_level == PAGE_MODE_NONE) {
|
||||
page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp,
|
||||
SZ_4K);
|
||||
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
|
||||
|
||||
/* pte could have been changed somewhere. */
|
||||
if (!try_cmpxchg64(pte, &__pte, __npte))
|
||||
iommu_free_pages(page);
|
||||
else if (IOMMU_PTE_PRESENT(__pte))
|
||||
*updated = true;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* No level skipping support yet */
|
||||
if (pte_level != level)
|
||||
return NULL;
|
||||
|
||||
level -= 1;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(__pte);
|
||||
|
||||
if (pte_page && level == end_lvl)
|
||||
*pte_page = pte;
|
||||
|
||||
pte = &pte[PM_LEVEL_INDEX(level, address)];
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function checks if there is a PTE for a given dma address. If
|
||||
* there is one, it returns the pointer to it.
|
||||
*/
|
||||
static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
|
||||
unsigned long address,
|
||||
unsigned long *page_size)
|
||||
{
|
||||
int level;
|
||||
unsigned int seqcount;
|
||||
u64 *pte;
|
||||
|
||||
*page_size = 0;
|
||||
|
||||
if (address > PM_LEVEL_SIZE(pgtable->mode))
|
||||
return NULL;
|
||||
|
||||
do {
|
||||
seqcount = read_seqcount_begin(&pgtable->seqcount);
|
||||
level = pgtable->mode - 1;
|
||||
pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
|
||||
} while (read_seqcount_retry(&pgtable->seqcount, seqcount));
|
||||
|
||||
*page_size = PTE_LEVEL_PAGE_SIZE(level);
|
||||
|
||||
while (level > 0) {
|
||||
|
||||
/* Not Present */
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return NULL;
|
||||
|
||||
/* Large PTE */
|
||||
if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
|
||||
PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
|
||||
break;
|
||||
|
||||
/* No level skipping support yet */
|
||||
if (PM_PTE_LEVEL(*pte) != level)
|
||||
return NULL;
|
||||
|
||||
level -= 1;
|
||||
|
||||
/* Walk to the next level */
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[PM_LEVEL_INDEX(level, address)];
|
||||
*page_size = PTE_LEVEL_PAGE_SIZE(level);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a series of large PTEs, make
|
||||
* sure to return a pointer to the first one.
|
||||
*/
|
||||
if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
|
||||
pte = first_pte_l7(pte, page_size, NULL);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static void free_clear_pte(u64 *pte, u64 pteval,
|
||||
struct iommu_pages_list *freelist)
|
||||
{
|
||||
u64 *pt;
|
||||
int mode;
|
||||
|
||||
while (!try_cmpxchg64(pte, &pteval, 0))
|
||||
pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(pteval))
|
||||
return;
|
||||
|
||||
pt = IOMMU_PTE_PAGE(pteval);
|
||||
mode = IOMMU_PTE_MODE(pteval);
|
||||
|
||||
free_sub_pt(pt, mode, freelist);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic mapping functions. It maps a physical address into a DMA
|
||||
* address space. It allocates the page table pages if necessary.
|
||||
* In the future it can be extended to a generic mapping function
|
||||
* supporting all features of AMD IOMMU page tables like level skipping
|
||||
* and full 64 bit address spaces.
|
||||
*/
|
||||
static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
|
||||
phys_addr_t paddr, size_t pgsize, size_t pgcount,
|
||||
int prot, gfp_t gfp, size_t *mapped)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
|
||||
bool updated = false;
|
||||
u64 __pte, *pte;
|
||||
int ret, i, count;
|
||||
size_t size = pgcount << __ffs(pgsize);
|
||||
unsigned long o_iova = iova;
|
||||
|
||||
BUG_ON(!IS_ALIGNED(iova, pgsize));
|
||||
BUG_ON(!IS_ALIGNED(paddr, pgsize));
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!(prot & IOMMU_PROT_MASK))
|
||||
goto out;
|
||||
|
||||
while (pgcount > 0) {
|
||||
count = PAGE_SIZE_PTE_COUNT(pgsize);
|
||||
pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
|
||||
|
||||
ret = -ENOMEM;
|
||||
if (!pte)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < count; ++i)
|
||||
free_clear_pte(&pte[i], pte[i], &freelist);
|
||||
|
||||
if (!iommu_pages_list_empty(&freelist))
|
||||
updated = true;
|
||||
|
||||
if (count > 1) {
|
||||
__pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize);
|
||||
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
|
||||
} else
|
||||
__pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
|
||||
|
||||
if (prot & IOMMU_PROT_IR)
|
||||
__pte |= IOMMU_PTE_IR;
|
||||
if (prot & IOMMU_PROT_IW)
|
||||
__pte |= IOMMU_PTE_IW;
|
||||
|
||||
for (i = 0; i < count; ++i)
|
||||
pte[i] = __pte;
|
||||
|
||||
iova += pgsize;
|
||||
paddr += pgsize;
|
||||
pgcount--;
|
||||
if (mapped)
|
||||
*mapped += pgsize;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (updated) {
|
||||
struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dom->lock, flags);
|
||||
/*
|
||||
* Flush domain TLB(s) and wait for completion. Any Device-Table
|
||||
* Updates and flushing already happened in
|
||||
* increase_address_space().
|
||||
*/
|
||||
amd_iommu_domain_flush_pages(dom, o_iova, size);
|
||||
spin_unlock_irqrestore(&dom->lock, flags);
|
||||
}
|
||||
|
||||
/* Everything flushed out, free pages now */
|
||||
iommu_put_pages_list(&freelist);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops,
|
||||
unsigned long iova,
|
||||
size_t pgsize, size_t pgcount,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
unsigned long long unmapped;
|
||||
unsigned long unmap_size;
|
||||
u64 *pte;
|
||||
size_t size = pgcount << __ffs(pgsize);
|
||||
|
||||
BUG_ON(!is_power_of_2(pgsize));
|
||||
|
||||
unmapped = 0;
|
||||
|
||||
while (unmapped < size) {
|
||||
pte = fetch_pte(pgtable, iova, &unmap_size);
|
||||
if (pte) {
|
||||
int i, count;
|
||||
|
||||
count = PAGE_SIZE_PTE_COUNT(unmap_size);
|
||||
for (i = 0; i < count; i++)
|
||||
pte[i] = 0ULL;
|
||||
} else {
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
iova = (iova & ~(unmap_size - 1)) + unmap_size;
|
||||
unmapped += unmap_size;
|
||||
}
|
||||
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
unsigned long offset_mask, pte_pgsize;
|
||||
u64 *pte, __pte;
|
||||
|
||||
pte = fetch_pte(pgtable, iova, &pte_pgsize);
|
||||
|
||||
if (!pte || !IOMMU_PTE_PRESENT(*pte))
|
||||
return 0;
|
||||
|
||||
offset_mask = pte_pgsize - 1;
|
||||
__pte = __sme_clr(*pte & PM_ADDR_MASK);
|
||||
|
||||
return (__pte & ~offset_mask) | (iova & offset_mask);
|
||||
}
|
||||
|
||||
static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
|
||||
unsigned long flags)
|
||||
{
|
||||
bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
|
||||
bool dirty = false;
|
||||
int i, count;
|
||||
|
||||
/*
|
||||
* 2.2.3.2 Host Dirty Support
|
||||
* When a non-default page size is used , software must OR the
|
||||
* Dirty bits in all of the replicated host PTEs used to map
|
||||
* the page. The IOMMU does not guarantee the Dirty bits are
|
||||
* set in all of the replicated PTEs. Any portion of the page
|
||||
* may have been written even if the Dirty bit is set in only
|
||||
* one of the replicated PTEs.
|
||||
*/
|
||||
count = PAGE_SIZE_PTE_COUNT(size);
|
||||
for (i = 0; i < count && test_only; i++) {
|
||||
if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
|
||||
dirty = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < count && !test_only; i++) {
|
||||
if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
|
||||
(unsigned long *)&ptep[i])) {
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
return dirty;
|
||||
}
|
||||
|
||||
static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
|
||||
unsigned long iova, size_t size,
|
||||
unsigned long flags,
|
||||
struct iommu_dirty_bitmap *dirty)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
unsigned long end = iova + size - 1;
|
||||
|
||||
do {
|
||||
unsigned long pgsize = 0;
|
||||
u64 *ptep, pte;
|
||||
|
||||
ptep = fetch_pte(pgtable, iova, &pgsize);
|
||||
if (ptep)
|
||||
pte = READ_ONCE(*ptep);
|
||||
if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
|
||||
pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
|
||||
iova += pgsize;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the whole IOVA range as dirty even if only one of
|
||||
* the replicated PTEs were marked dirty.
|
||||
*/
|
||||
if (pte_test_and_clear_dirty(ptep, pgsize, flags))
|
||||
iommu_dirty_bitmap_record(dirty, iova, pgsize);
|
||||
iova += pgsize;
|
||||
} while (iova < end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------
|
||||
*/
|
||||
static void v1_free_pgtable(struct io_pgtable *iop)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
|
||||
struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
|
||||
|
||||
if (pgtable->mode == PAGE_MODE_NONE)
|
||||
return;
|
||||
|
||||
/* Page-table is not visible to IOMMU anymore, so free it */
|
||||
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
|
||||
pgtable->mode > amd_iommu_hpt_level);
|
||||
|
||||
free_sub_pt(pgtable->root, pgtable->mode, &freelist);
|
||||
iommu_put_pages_list(&freelist);
|
||||
}
|
||||
|
||||
static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
|
||||
|
||||
pgtable->root =
|
||||
iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
|
||||
if (!pgtable->root)
|
||||
return NULL;
|
||||
pgtable->mode = PAGE_MODE_3_LEVEL;
|
||||
seqcount_init(&pgtable->seqcount);
|
||||
|
||||
cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
|
||||
cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
|
||||
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
|
||||
|
||||
pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages;
|
||||
pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages;
|
||||
pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
|
||||
pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
|
||||
|
||||
return &pgtable->pgtbl;
|
||||
}
|
||||
|
||||
struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
|
||||
.alloc = v1_alloc_pgtable,
|
||||
.free = v1_free_pgtable,
|
||||
};
|
||||
|
|
@ -1,370 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* CPU-agnostic AMD IO page table v2 allocator.
|
||||
*
|
||||
* Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
|
||||
* Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
||||
* Author: Vasant Hegde <vasant.hegde@amd.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "AMD-Vi: " fmt
|
||||
#define dev_fmt(fmt) pr_fmt(fmt)
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
|
||||
#include "amd_iommu_types.h"
|
||||
#include "amd_iommu.h"
|
||||
#include "../iommu-pages.h"
|
||||
|
||||
#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
|
||||
#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
|
||||
#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */
|
||||
#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */
|
||||
#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */
|
||||
#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */
|
||||
#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */
|
||||
#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */
|
||||
#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */
|
||||
|
||||
#define MAX_PTRS_PER_PAGE 512
|
||||
|
||||
#define IOMMU_PAGE_SIZE_2M BIT_ULL(21)
|
||||
#define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
|
||||
|
||||
|
||||
static inline int get_pgtable_level(void)
|
||||
{
|
||||
return amd_iommu_gpt_level;
|
||||
}
|
||||
|
||||
static inline bool is_large_pte(u64 pte)
|
||||
{
|
||||
return (pte & IOMMU_PAGE_PSE);
|
||||
}
|
||||
|
||||
static inline u64 set_pgtable_attr(u64 *page)
|
||||
{
|
||||
u64 prot;
|
||||
|
||||
prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
|
||||
prot |= IOMMU_PAGE_ACCESS;
|
||||
|
||||
return (iommu_virt_to_phys(page) | prot);
|
||||
}
|
||||
|
||||
static inline void *get_pgtable_pte(u64 pte)
|
||||
{
|
||||
return iommu_phys_to_virt(pte & PM_ADDR_MASK);
|
||||
}
|
||||
|
||||
static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
|
||||
{
|
||||
u64 pte;
|
||||
|
||||
pte = __sme_set(paddr & PM_ADDR_MASK);
|
||||
pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
|
||||
pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
|
||||
|
||||
if (prot & IOMMU_PROT_IW)
|
||||
pte |= IOMMU_PAGE_RW;
|
||||
|
||||
/* Large page */
|
||||
if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
|
||||
pte |= IOMMU_PAGE_PSE;
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline u64 get_alloc_page_size(u64 size)
|
||||
{
|
||||
if (size >= IOMMU_PAGE_SIZE_1G)
|
||||
return IOMMU_PAGE_SIZE_1G;
|
||||
|
||||
if (size >= IOMMU_PAGE_SIZE_2M)
|
||||
return IOMMU_PAGE_SIZE_2M;
|
||||
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline int page_size_to_level(u64 pg_size)
|
||||
{
|
||||
if (pg_size == IOMMU_PAGE_SIZE_1G)
|
||||
return PAGE_MODE_3_LEVEL;
|
||||
if (pg_size == IOMMU_PAGE_SIZE_2M)
|
||||
return PAGE_MODE_2_LEVEL;
|
||||
|
||||
return PAGE_MODE_1_LEVEL;
|
||||
}
|
||||
|
||||
static void free_pgtable(u64 *pt, int level)
|
||||
{
|
||||
u64 *p;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
|
||||
/* PTE present? */
|
||||
if (!IOMMU_PTE_PRESENT(pt[i]))
|
||||
continue;
|
||||
|
||||
if (is_large_pte(pt[i]))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Free the next level. No need to look at l1 tables here since
|
||||
* they can only contain leaf PTEs; just free them directly.
|
||||
*/
|
||||
p = get_pgtable_pte(pt[i]);
|
||||
if (level > 2)
|
||||
free_pgtable(p, level - 1);
|
||||
else
|
||||
iommu_free_pages(p);
|
||||
}
|
||||
|
||||
iommu_free_pages(pt);
|
||||
}
|
||||
|
||||
/* Allocate page table */
|
||||
static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
|
||||
unsigned long pg_size, gfp_t gfp, bool *updated)
|
||||
{
|
||||
u64 *pte, *page;
|
||||
int level, end_level;
|
||||
|
||||
level = get_pgtable_level() - 1;
|
||||
end_level = page_size_to_level(pg_size);
|
||||
pte = &pgd[PM_LEVEL_INDEX(level, iova)];
|
||||
iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
|
||||
|
||||
while (level >= end_level) {
|
||||
u64 __pte, __npte;
|
||||
|
||||
__pte = *pte;
|
||||
|
||||
if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
|
||||
/* Unmap large pte */
|
||||
cmpxchg64(pte, *pte, 0ULL);
|
||||
*updated = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(__pte)) {
|
||||
page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
__npte = set_pgtable_attr(page);
|
||||
/* pte could have been changed somewhere. */
|
||||
if (!try_cmpxchg64(pte, &__pte, __npte))
|
||||
iommu_free_pages(page);
|
||||
else if (IOMMU_PTE_PRESENT(__pte))
|
||||
*updated = true;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
level -= 1;
|
||||
pte = get_pgtable_pte(__pte);
|
||||
pte = &pte[PM_LEVEL_INDEX(level, iova)];
|
||||
}
|
||||
|
||||
/* Tear down existing pte entries */
|
||||
if (IOMMU_PTE_PRESENT(*pte)) {
|
||||
u64 *__pte;
|
||||
|
||||
*updated = true;
|
||||
__pte = get_pgtable_pte(*pte);
|
||||
cmpxchg64(pte, *pte, 0ULL);
|
||||
if (pg_size == IOMMU_PAGE_SIZE_1G)
|
||||
free_pgtable(__pte, end_level - 1);
|
||||
else if (pg_size == IOMMU_PAGE_SIZE_2M)
|
||||
iommu_free_pages(__pte);
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function checks if there is a PTE for a given dma address.
|
||||
* If there is one, it returns the pointer to it.
|
||||
*/
|
||||
static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
|
||||
unsigned long iova, unsigned long *page_size)
|
||||
{
|
||||
u64 *pte;
|
||||
int level;
|
||||
|
||||
level = get_pgtable_level() - 1;
|
||||
pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
|
||||
/* Default page size is 4K */
|
||||
*page_size = PAGE_SIZE;
|
||||
|
||||
while (level) {
|
||||
/* Not present */
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return NULL;
|
||||
|
||||
/* Walk to the next level */
|
||||
pte = get_pgtable_pte(*pte);
|
||||
pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
|
||||
|
||||
/* Large page */
|
||||
if (is_large_pte(*pte)) {
|
||||
if (level == PAGE_MODE_3_LEVEL)
|
||||
*page_size = IOMMU_PAGE_SIZE_1G;
|
||||
else if (level == PAGE_MODE_2_LEVEL)
|
||||
*page_size = IOMMU_PAGE_SIZE_2M;
|
||||
else
|
||||
return NULL; /* Wrongly set PSE bit in PTE */
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
level -= 1;
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
|
||||
phys_addr_t paddr, size_t pgsize, size_t pgcount,
|
||||
int prot, gfp_t gfp, size_t *mapped)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
|
||||
u64 *pte;
|
||||
unsigned long map_size;
|
||||
unsigned long mapped_size = 0;
|
||||
unsigned long o_iova = iova;
|
||||
size_t size = pgcount << __ffs(pgsize);
|
||||
int ret = 0;
|
||||
bool updated = false;
|
||||
|
||||
if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(prot & IOMMU_PROT_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
while (mapped_size < size) {
|
||||
map_size = get_alloc_page_size(pgsize);
|
||||
pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
|
||||
iova, map_size, gfp, &updated);
|
||||
if (!pte) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*pte = set_pte_attr(paddr, map_size, prot);
|
||||
|
||||
iova += map_size;
|
||||
paddr += map_size;
|
||||
mapped_size += map_size;
|
||||
}
|
||||
|
||||
out:
|
||||
if (updated) {
|
||||
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pdom->lock, flags);
|
||||
amd_iommu_domain_flush_pages(pdom, o_iova, size);
|
||||
spin_unlock_irqrestore(&pdom->lock, flags);
|
||||
}
|
||||
|
||||
if (mapped)
|
||||
*mapped += mapped_size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
|
||||
unsigned long iova,
|
||||
size_t pgsize, size_t pgcount,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
|
||||
unsigned long unmap_size;
|
||||
unsigned long unmapped = 0;
|
||||
size_t size = pgcount << __ffs(pgsize);
|
||||
u64 *pte;
|
||||
|
||||
if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
|
||||
return 0;
|
||||
|
||||
while (unmapped < size) {
|
||||
pte = fetch_pte(pgtable, iova, &unmap_size);
|
||||
if (!pte)
|
||||
return unmapped;
|
||||
|
||||
*pte = 0ULL;
|
||||
|
||||
iova = (iova & ~(unmap_size - 1)) + unmap_size;
|
||||
unmapped += unmap_size;
|
||||
}
|
||||
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
|
||||
unsigned long offset_mask, pte_pgsize;
|
||||
u64 *pte, __pte;
|
||||
|
||||
pte = fetch_pte(pgtable, iova, &pte_pgsize);
|
||||
if (!pte || !IOMMU_PTE_PRESENT(*pte))
|
||||
return 0;
|
||||
|
||||
offset_mask = pte_pgsize - 1;
|
||||
__pte = __sme_clr(*pte & PM_ADDR_MASK);
|
||||
|
||||
return (__pte & ~offset_mask) | (iova & offset_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------
|
||||
*/
|
||||
static void v2_free_pgtable(struct io_pgtable *iop)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
|
||||
|
||||
if (!pgtable || !pgtable->pgd)
|
||||
return;
|
||||
|
||||
/* Free page table */
|
||||
free_pgtable(pgtable->pgd, get_pgtable_level());
|
||||
pgtable->pgd = NULL;
|
||||
}
|
||||
|
||||
static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
|
||||
{
|
||||
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
|
||||
int ias = IOMMU_IN_ADDR_BIT_SIZE;
|
||||
|
||||
pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
|
||||
if (!pgtable->pgd)
|
||||
return NULL;
|
||||
|
||||
if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
|
||||
ias = 57;
|
||||
|
||||
pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages;
|
||||
pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages;
|
||||
pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys;
|
||||
|
||||
cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
|
||||
cfg->ias = ias;
|
||||
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
|
||||
|
||||
return &pgtable->pgtbl;
|
||||
}
|
||||
|
||||
struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
|
||||
.alloc = v2_alloc_pgtable,
|
||||
.free = v2_free_pgtable,
|
||||
};
|
||||
|
|
@ -30,7 +30,6 @@
|
|||
#include <linux/msi.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/cc_platform.h>
|
||||
#include <asm/irq_remapping.h>
|
||||
#include <asm/io_apic.h>
|
||||
|
|
@ -41,9 +40,9 @@
|
|||
#include <asm/gart.h>
|
||||
#include <asm/dma.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
|
||||
#include "amd_iommu.h"
|
||||
#include "../dma-iommu.h"
|
||||
#include "../irq_remapping.h"
|
||||
#include "../iommu-pages.h"
|
||||
|
||||
|
|
@ -60,7 +59,6 @@ LIST_HEAD(hpet_map);
|
|||
LIST_HEAD(acpihid_map);
|
||||
|
||||
const struct iommu_ops amd_iommu_ops;
|
||||
static const struct iommu_dirty_ops amd_dirty_ops;
|
||||
|
||||
int amd_iommu_max_glx_val = -1;
|
||||
|
||||
|
|
@ -70,15 +68,22 @@ int amd_iommu_max_glx_val = -1;
|
|||
*/
|
||||
DEFINE_IDA(pdom_ids);
|
||||
|
||||
static int amd_iommu_attach_device(struct iommu_domain *dom,
|
||||
struct device *dev);
|
||||
static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev,
|
||||
struct iommu_domain *old);
|
||||
|
||||
static void set_dte_entry(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data);
|
||||
struct iommu_dev_data *dev_data,
|
||||
phys_addr_t top_paddr, unsigned int top_level);
|
||||
|
||||
static void amd_iommu_change_top(struct pt_iommu *iommu_table,
|
||||
phys_addr_t top_paddr, unsigned int top_level);
|
||||
|
||||
static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid);
|
||||
|
||||
static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
|
||||
static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain);
|
||||
static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
|
||||
bool enable);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
|
|
@ -1157,6 +1162,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
|
|||
*
|
||||
****************************************************************************/
|
||||
|
||||
static void dump_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
struct iommu_cmd *cmd;
|
||||
u32 head, tail;
|
||||
int i;
|
||||
|
||||
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
|
||||
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
|
||||
pr_err("CMD Buffer head=%llu tail=%llu\n", MMIO_CMD_BUFFER_HEAD(head),
|
||||
MMIO_CMD_BUFFER_TAIL(tail));
|
||||
|
||||
for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
|
||||
cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
|
||||
pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
|
||||
cmd->data[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static int wait_on_sem(struct amd_iommu *iommu, u64 data)
|
||||
{
|
||||
int i = 0;
|
||||
|
|
@ -1167,7 +1191,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
|
|||
}
|
||||
|
||||
if (i == LOOP_TIMEOUT) {
|
||||
pr_alert("Completion-Wait loop timed out\n");
|
||||
|
||||
pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
|
||||
iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
|
||||
PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
|
||||
|
||||
if (amd_iommu_dump)
|
||||
DO_ONCE_LITE(dump_command_buffer, iommu);
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
|
@ -1756,42 +1787,6 @@ static void dev_flush_pasid_all(struct iommu_dev_data *dev_data,
|
|||
CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
|
||||
}
|
||||
|
||||
/* Flush the not present cache if it exists */
|
||||
static void domain_flush_np_cache(struct protection_domain *domain,
|
||||
dma_addr_t iova, size_t size)
|
||||
{
|
||||
if (unlikely(amd_iommu_np_cache)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
amd_iommu_domain_flush_pages(domain, iova, size);
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function flushes the DTEs for all devices in domain
|
||||
*/
|
||||
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
|
||||
{
|
||||
struct iommu_dev_data *dev_data;
|
||||
|
||||
lockdep_assert_held(&domain->lock);
|
||||
|
||||
list_for_each_entry(dev_data, &domain->dev_list, list) {
|
||||
struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
|
||||
|
||||
set_dte_entry(iommu, dev_data);
|
||||
clone_aliases(iommu, dev_data->dev);
|
||||
}
|
||||
|
||||
list_for_each_entry(dev_data, &domain->dev_list, list)
|
||||
device_flush_dte(dev_data);
|
||||
|
||||
domain_flush_complete(domain);
|
||||
}
|
||||
|
||||
int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
|
||||
{
|
||||
struct iommu_dev_data *dev_data;
|
||||
|
|
@ -2051,7 +2046,8 @@ static void set_dte_gcr3_table(struct amd_iommu *iommu,
|
|||
}
|
||||
|
||||
static void set_dte_entry(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data)
|
||||
struct iommu_dev_data *dev_data,
|
||||
phys_addr_t top_paddr, unsigned int top_level)
|
||||
{
|
||||
u16 domid;
|
||||
u32 old_domid;
|
||||
|
|
@ -2060,19 +2056,36 @@ static void set_dte_entry(struct amd_iommu *iommu,
|
|||
struct protection_domain *domain = dev_data->domain;
|
||||
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
|
||||
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
|
||||
|
||||
if (gcr3_info && gcr3_info->gcr3_tbl)
|
||||
domid = dev_data->gcr3_info.domid;
|
||||
else
|
||||
domid = domain->id;
|
||||
struct pt_iommu_amdv1_hw_info pt_info;
|
||||
|
||||
make_clear_dte(dev_data, dte, &new);
|
||||
|
||||
if (domain->iop.mode != PAGE_MODE_NONE)
|
||||
new.data[0] |= iommu_virt_to_phys(domain->iop.root);
|
||||
if (gcr3_info && gcr3_info->gcr3_tbl)
|
||||
domid = dev_data->gcr3_info.domid;
|
||||
else {
|
||||
domid = domain->id;
|
||||
|
||||
new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
|
||||
<< DEV_ENTRY_MODE_SHIFT;
|
||||
if (domain->domain.type & __IOMMU_DOMAIN_PAGING) {
|
||||
/*
|
||||
* When updating the IO pagetable, the new top and level
|
||||
* are provided as parameters. For other operations i.e.
|
||||
* device attach, retrieve the current pagetable info
|
||||
* via the IOMMU PT API.
|
||||
*/
|
||||
if (top_paddr) {
|
||||
pt_info.host_pt_root = top_paddr;
|
||||
pt_info.mode = top_level + 1;
|
||||
} else {
|
||||
WARN_ON(top_paddr || top_level);
|
||||
pt_iommu_amdv1_hw_info(&domain->amdv1,
|
||||
&pt_info);
|
||||
}
|
||||
|
||||
new.data[0] |= __sme_set(pt_info.host_pt_root) |
|
||||
(pt_info.mode & DEV_ENTRY_MODE_MASK)
|
||||
<< DEV_ENTRY_MODE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
|
||||
|
||||
|
|
@ -2138,7 +2151,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
|
|||
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
|
||||
|
||||
if (set)
|
||||
set_dte_entry(iommu, dev_data);
|
||||
set_dte_entry(iommu, dev_data, 0, 0);
|
||||
else
|
||||
clear_dte_entry(iommu, dev_data);
|
||||
|
||||
|
|
@ -2156,6 +2169,7 @@ static int init_gcr3_table(struct iommu_dev_data *dev_data,
|
|||
{
|
||||
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
|
||||
int max_pasids = dev_data->max_pasids;
|
||||
struct pt_iommu_x86_64_hw_info pt_info;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
|
|
@ -2178,7 +2192,8 @@ static int init_gcr3_table(struct iommu_dev_data *dev_data,
|
|||
if (!pdom_is_v2_pgtbl_mode(pdom))
|
||||
return ret;
|
||||
|
||||
ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(pdom->iop.pgd), true);
|
||||
pt_iommu_x86_64_hw_info(&pdom->amdv2, &pt_info);
|
||||
ret = update_gcr3(dev_data, 0, __sme_set(pt_info.gcr3_pt), true);
|
||||
if (ret)
|
||||
free_gcr3_table(&dev_data->gcr3_info);
|
||||
|
||||
|
|
@ -2500,54 +2515,6 @@ struct protection_domain *protection_domain_alloc(void)
|
|||
return domain;
|
||||
}
|
||||
|
||||
static int pdom_setup_pgtable(struct protection_domain *domain,
|
||||
struct device *dev)
|
||||
{
|
||||
struct io_pgtable_ops *pgtbl_ops;
|
||||
enum io_pgtable_fmt fmt;
|
||||
|
||||
switch (domain->pd_mode) {
|
||||
case PD_MODE_V1:
|
||||
fmt = AMD_IOMMU_V1;
|
||||
break;
|
||||
case PD_MODE_V2:
|
||||
fmt = AMD_IOMMU_V2;
|
||||
break;
|
||||
case PD_MODE_NONE:
|
||||
WARN_ON_ONCE(1);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
|
||||
pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain);
|
||||
if (!pgtbl_ops)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 dma_max_address(enum protection_domain_mode pgtable)
|
||||
{
|
||||
if (pgtable == PD_MODE_V1)
|
||||
return PM_LEVEL_SIZE(amd_iommu_hpt_level);
|
||||
|
||||
/*
|
||||
* V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page
|
||||
* Translation" shows that the V2 table sign extends the top of the
|
||||
* address space creating a reserved region in the middle of the
|
||||
* translation, just like the CPU does. Further Vasant says the docs are
|
||||
* incomplete and this only applies to non-zero PASIDs. If the AMDv2
|
||||
* page table is assigned to the 0 PASID then there is no sign extension
|
||||
* check.
|
||||
*
|
||||
* Since the IOMMU must have a fixed geometry, and the core code does
|
||||
* not understand sign extended addressing, we have to chop off the high
|
||||
* bit to get consistent behavior with attachments of the domain to any
|
||||
* PASID.
|
||||
*/
|
||||
return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1);
|
||||
}
|
||||
|
||||
static bool amd_iommu_hd_support(struct amd_iommu *iommu)
|
||||
{
|
||||
if (amd_iommu_hatdis)
|
||||
|
|
@ -2556,38 +2523,232 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu)
|
|||
return iommu && (iommu->features & FEATURE_HDSUP);
|
||||
}
|
||||
|
||||
static struct iommu_domain *
|
||||
do_iommu_domain_alloc(struct device *dev, u32 flags,
|
||||
enum protection_domain_mode pgtable)
|
||||
static spinlock_t *amd_iommu_get_top_lock(struct pt_iommu *iommupt)
|
||||
{
|
||||
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
|
||||
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
|
||||
struct protection_domain *pdom =
|
||||
container_of(iommupt, struct protection_domain, iommu);
|
||||
|
||||
return &pdom->lock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update all HW references to the domain with a new pgtable configuration.
|
||||
*/
|
||||
static void amd_iommu_change_top(struct pt_iommu *iommu_table,
|
||||
phys_addr_t top_paddr, unsigned int top_level)
|
||||
{
|
||||
struct protection_domain *pdom =
|
||||
container_of(iommu_table, struct protection_domain, iommu);
|
||||
struct iommu_dev_data *dev_data;
|
||||
|
||||
lockdep_assert_held(&pdom->lock);
|
||||
|
||||
/* Update the DTE for all devices attached to this domain */
|
||||
list_for_each_entry(dev_data, &pdom->dev_list, list) {
|
||||
struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
|
||||
|
||||
/* Update the HW references with the new level and top ptr */
|
||||
set_dte_entry(iommu, dev_data, top_paddr, top_level);
|
||||
clone_aliases(iommu, dev_data->dev);
|
||||
}
|
||||
|
||||
list_for_each_entry(dev_data, &pdom->dev_list, list)
|
||||
device_flush_dte(dev_data);
|
||||
|
||||
domain_flush_complete(pdom);
|
||||
}
|
||||
|
||||
/*
|
||||
* amd_iommu_iotlb_sync_map() is used to generate flushes for non-present to
|
||||
* present (ie mapping) operations. It is a NOP if the IOMMU doesn't have non
|
||||
* present caching (like hypervisor shadowing).
|
||||
*/
|
||||
static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
unsigned long flags;
|
||||
|
||||
if (likely(!amd_iommu_np_cache))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
amd_iommu_domain_flush_pages(domain, iova, size);
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
|
||||
{
|
||||
struct protection_domain *dom = to_pdomain(domain);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dom->lock, flags);
|
||||
amd_iommu_domain_flush_all(dom);
|
||||
spin_unlock_irqrestore(&dom->lock, flags);
|
||||
}
|
||||
|
||||
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
struct protection_domain *dom = to_pdomain(domain);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dom->lock, flags);
|
||||
amd_iommu_domain_flush_pages(dom, gather->start,
|
||||
gather->end - gather->start + 1);
|
||||
spin_unlock_irqrestore(&dom->lock, flags);
|
||||
iommu_put_pages_list(&gather->freelist);
|
||||
}
|
||||
|
||||
static const struct pt_iommu_driver_ops amd_hw_driver_ops_v1 = {
|
||||
.get_top_lock = amd_iommu_get_top_lock,
|
||||
.change_top = amd_iommu_change_top,
|
||||
};
|
||||
|
||||
static const struct iommu_domain_ops amdv1_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS(amdv1),
|
||||
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
|
||||
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
|
||||
.iotlb_sync = amd_iommu_iotlb_sync,
|
||||
.attach_dev = amd_iommu_attach_device,
|
||||
.free = amd_iommu_domain_free,
|
||||
.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
|
||||
};
|
||||
|
||||
static const struct iommu_dirty_ops amdv1_dirty_ops = {
|
||||
IOMMU_PT_DIRTY_OPS(amdv1),
|
||||
.set_dirty_tracking = amd_iommu_set_dirty_tracking,
|
||||
};
|
||||
|
||||
static struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev,
|
||||
u32 flags)
|
||||
{
|
||||
struct pt_iommu_amdv1_cfg cfg = {};
|
||||
struct protection_domain *domain;
|
||||
int ret;
|
||||
|
||||
if (amd_iommu_hatdis)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
domain = protection_domain_alloc();
|
||||
if (!domain)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
domain->pd_mode = pgtable;
|
||||
ret = pdom_setup_pgtable(domain, dev);
|
||||
domain->pd_mode = PD_MODE_V1;
|
||||
domain->iommu.driver_ops = &amd_hw_driver_ops_v1;
|
||||
domain->iommu.nid = dev_to_node(dev);
|
||||
if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
|
||||
domain->domain.dirty_ops = &amdv1_dirty_ops;
|
||||
|
||||
/*
|
||||
* Someday FORCE_COHERENCE should be set by
|
||||
* amd_iommu_enforce_cache_coherency() like VT-d does.
|
||||
*/
|
||||
cfg.common.features = BIT(PT_FEAT_DYNAMIC_TOP) |
|
||||
BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
|
||||
BIT(PT_FEAT_AMDV1_FORCE_COHERENCE);
|
||||
|
||||
/*
|
||||
* AMD's IOMMU can flush as many pages as necessary in a single flush.
|
||||
* Unless we run in a virtual machine, which can be inferred according
|
||||
* to whether "non-present cache" is on, it is probably best to prefer
|
||||
* (potentially) too extensive TLB flushing (i.e., more misses) over
|
||||
* multiple TLB flushes (i.e., more flushes). For virtual machines the
|
||||
* hypervisor needs to synchronize the host IOMMU PTEs with those of
|
||||
* the guest, and the trade-off is different: unnecessary TLB flushes
|
||||
* should be avoided.
|
||||
*/
|
||||
if (amd_iommu_np_cache)
|
||||
cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS);
|
||||
else
|
||||
cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
|
||||
|
||||
cfg.common.hw_max_vasz_lg2 =
|
||||
min(64, (amd_iommu_hpt_level - 1) * 9 + 21);
|
||||
cfg.common.hw_max_oasz_lg2 = 52;
|
||||
cfg.starting_level = 2;
|
||||
domain->domain.ops = &amdv1_ops;
|
||||
|
||||
ret = pt_iommu_amdv1_init(&domain->amdv1, &cfg, GFP_KERNEL);
|
||||
if (ret) {
|
||||
pdom_id_free(domain->id);
|
||||
kfree(domain);
|
||||
amd_iommu_domain_free(&domain->domain);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
domain->domain.geometry.aperture_start = 0;
|
||||
domain->domain.geometry.aperture_end = dma_max_address(pgtable);
|
||||
domain->domain.geometry.force_aperture = true;
|
||||
domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
|
||||
/*
|
||||
* Narrow the supported page sizes to those selected by the kernel
|
||||
* command line.
|
||||
*/
|
||||
domain->domain.pgsize_bitmap &= amd_iommu_pgsize_bitmap;
|
||||
return &domain->domain;
|
||||
}
|
||||
|
||||
domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
|
||||
domain->domain.ops = iommu->iommu.ops->default_domain_ops;
|
||||
static const struct iommu_domain_ops amdv2_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS(x86_64),
|
||||
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
|
||||
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
|
||||
.iotlb_sync = amd_iommu_iotlb_sync,
|
||||
.attach_dev = amd_iommu_attach_device,
|
||||
.free = amd_iommu_domain_free,
|
||||
/*
|
||||
* Note the AMDv2 page table format does not support a Force Coherency
|
||||
* bit, so enforce_cache_coherency should not be set. However VFIO is
|
||||
* not prepared to handle a case where some domains will support
|
||||
* enforcement and others do not. VFIO and iommufd will have to be fixed
|
||||
* before it can fully use the V2 page table. See the comment in
|
||||
* iommufd_hwpt_paging_alloc(). For now leave things as they have
|
||||
* historically been and lie about enforce_cache_coherencey.
|
||||
*/
|
||||
.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
|
||||
};
|
||||
|
||||
if (dirty_tracking)
|
||||
domain->domain.dirty_ops = &amd_dirty_ops;
|
||||
static struct iommu_domain *amd_iommu_domain_alloc_paging_v2(struct device *dev,
|
||||
u32 flags)
|
||||
{
|
||||
struct pt_iommu_x86_64_cfg cfg = {};
|
||||
struct protection_domain *domain;
|
||||
int ret;
|
||||
|
||||
if (!amd_iommu_v2_pgtbl_supported())
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
domain = protection_domain_alloc();
|
||||
if (!domain)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
domain->pd_mode = PD_MODE_V2;
|
||||
domain->iommu.nid = dev_to_node(dev);
|
||||
|
||||
cfg.common.features = BIT(PT_FEAT_X86_64_AMD_ENCRYPT_TABLES);
|
||||
if (amd_iommu_np_cache)
|
||||
cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS);
|
||||
else
|
||||
cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
|
||||
|
||||
/*
|
||||
* The v2 table behaves differently if it is attached to PASID 0 vs a
|
||||
* non-zero PASID. On PASID 0 it has no sign extension and the full
|
||||
* 57/48 bits decode the lower addresses. Otherwise it behaves like a
|
||||
* normal sign extended x86 page table. Since we want the domain to work
|
||||
* in both modes the top bit is removed and PT_FEAT_SIGN_EXTEND is not
|
||||
* set which creates a table that is compatible in both modes.
|
||||
*/
|
||||
if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
|
||||
cfg.common.hw_max_vasz_lg2 = 56;
|
||||
cfg.top_level = 4;
|
||||
} else {
|
||||
cfg.common.hw_max_vasz_lg2 = 47;
|
||||
cfg.top_level = 3;
|
||||
}
|
||||
cfg.common.hw_max_oasz_lg2 = 52;
|
||||
domain->domain.ops = &amdv2_ops;
|
||||
|
||||
ret = pt_iommu_x86_64_init(&domain->amdv2, &cfg, GFP_KERNEL);
|
||||
if (ret) {
|
||||
amd_iommu_domain_free(&domain->domain);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
return &domain->domain;
|
||||
}
|
||||
|
||||
|
|
@ -2608,15 +2769,27 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
|
|||
/* Allocate domain with v1 page table for dirty tracking */
|
||||
if (!amd_iommu_hd_support(iommu))
|
||||
break;
|
||||
return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
|
||||
return amd_iommu_domain_alloc_paging_v1(dev, flags);
|
||||
case IOMMU_HWPT_ALLOC_PASID:
|
||||
/* Allocate domain with v2 page table if IOMMU supports PASID. */
|
||||
if (!amd_iommu_pasid_supported())
|
||||
break;
|
||||
return do_iommu_domain_alloc(dev, flags, PD_MODE_V2);
|
||||
case 0:
|
||||
return amd_iommu_domain_alloc_paging_v2(dev, flags);
|
||||
case 0: {
|
||||
struct iommu_domain *ret;
|
||||
|
||||
/* If nothing specific is required use the kernel commandline default */
|
||||
return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable);
|
||||
if (amd_iommu_pgtable == PD_MODE_V1) {
|
||||
ret = amd_iommu_domain_alloc_paging_v1(dev, flags);
|
||||
if (ret != ERR_PTR(-EOPNOTSUPP))
|
||||
return ret;
|
||||
return amd_iommu_domain_alloc_paging_v2(dev, flags);
|
||||
}
|
||||
ret = amd_iommu_domain_alloc_paging_v2(dev, flags);
|
||||
if (ret != ERR_PTR(-EOPNOTSUPP))
|
||||
return ret;
|
||||
return amd_iommu_domain_alloc_paging_v1(dev, flags);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -2628,14 +2801,14 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
|
|||
struct protection_domain *domain = to_pdomain(dom);
|
||||
|
||||
WARN_ON(!list_empty(&domain->dev_list));
|
||||
if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
|
||||
free_io_pgtable_ops(&domain->iop.pgtbl.ops);
|
||||
pt_iommu_deinit(&domain->iommu);
|
||||
pdom_id_free(domain->id);
|
||||
kfree(domain);
|
||||
}
|
||||
|
||||
static int blocked_domain_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
|
||||
|
||||
|
|
@ -2685,16 +2858,8 @@ void amd_iommu_init_identity_domain(void)
|
|||
protection_domain_init(&identity_domain);
|
||||
}
|
||||
|
||||
/* Same as blocked domain except it supports only ops->attach_dev() */
|
||||
static struct iommu_domain release_domain = {
|
||||
.type = IOMMU_DOMAIN_BLOCKED,
|
||||
.ops = &(const struct iommu_domain_ops) {
|
||||
.attach_dev = blocked_domain_attach_device,
|
||||
}
|
||||
};
|
||||
|
||||
static int amd_iommu_attach_device(struct iommu_domain *dom,
|
||||
struct device *dev)
|
||||
static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
|
|
@ -2734,93 +2899,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
|
||||
|
||||
if (ops->map_pages)
|
||||
domain_flush_np_cache(domain, iova, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
|
||||
phys_addr_t paddr, size_t pgsize, size_t pgcount,
|
||||
int iommu_prot, gfp_t gfp, size_t *mapped)
|
||||
{
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
|
||||
int prot = 0;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if ((domain->pd_mode == PD_MODE_V1) &&
|
||||
(domain->iop.mode == PAGE_MODE_NONE))
|
||||
return -EINVAL;
|
||||
|
||||
if (iommu_prot & IOMMU_READ)
|
||||
prot |= IOMMU_PROT_IR;
|
||||
if (iommu_prot & IOMMU_WRITE)
|
||||
prot |= IOMMU_PROT_IW;
|
||||
|
||||
if (ops->map_pages) {
|
||||
ret = ops->map_pages(ops, iova, paddr, pgsize,
|
||||
pgcount, prot, gfp, mapped);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
/*
|
||||
* AMD's IOMMU can flush as many pages as necessary in a single flush.
|
||||
* Unless we run in a virtual machine, which can be inferred according
|
||||
* to whether "non-present cache" is on, it is probably best to prefer
|
||||
* (potentially) too extensive TLB flushing (i.e., more misses) over
|
||||
* mutliple TLB flushes (i.e., more flushes). For virtual machines the
|
||||
* hypervisor needs to synchronize the host IOMMU PTEs with those of
|
||||
* the guest, and the trade-off is different: unnecessary TLB flushes
|
||||
* should be avoided.
|
||||
*/
|
||||
if (amd_iommu_np_cache &&
|
||||
iommu_iotlb_gather_is_disjoint(gather, iova, size))
|
||||
iommu_iotlb_sync(domain, gather);
|
||||
|
||||
iommu_iotlb_gather_add_range(gather, iova, size);
|
||||
}
|
||||
|
||||
static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova,
|
||||
size_t pgsize, size_t pgcount,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
|
||||
size_t r;
|
||||
|
||||
if ((domain->pd_mode == PD_MODE_V1) &&
|
||||
(domain->iop.mode == PAGE_MODE_NONE))
|
||||
return 0;
|
||||
|
||||
r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0;
|
||||
|
||||
if (r)
|
||||
amd_iommu_iotlb_gather_add_page(dom, gather, iova, r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
|
||||
dma_addr_t iova)
|
||||
{
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
|
||||
|
||||
return ops->iova_to_phys(ops, iova);
|
||||
}
|
||||
|
||||
static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
|
||||
{
|
||||
switch (cap) {
|
||||
|
|
@ -2887,28 +2965,6 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size,
|
||||
unsigned long flags,
|
||||
struct iommu_dirty_bitmap *dirty)
|
||||
{
|
||||
struct protection_domain *pdomain = to_pdomain(domain);
|
||||
struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops;
|
||||
unsigned long lflags;
|
||||
|
||||
if (!ops || !ops->read_and_clear_dirty)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
spin_lock_irqsave(&pdomain->lock, lflags);
|
||||
if (!pdomain->dirty_tracking && dirty->bitmap) {
|
||||
spin_unlock_irqrestore(&pdomain->lock, lflags);
|
||||
return -EINVAL;
|
||||
}
|
||||
spin_unlock_irqrestore(&pdomain->lock, lflags);
|
||||
|
||||
return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
|
||||
}
|
||||
|
||||
static void amd_iommu_get_resv_regions(struct device *dev,
|
||||
struct list_head *head)
|
||||
{
|
||||
|
|
@ -2978,28 +3034,6 @@ static bool amd_iommu_is_attach_deferred(struct device *dev)
|
|||
return dev_data->defer_attach;
|
||||
}
|
||||
|
||||
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
|
||||
{
|
||||
struct protection_domain *dom = to_pdomain(domain);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dom->lock, flags);
|
||||
amd_iommu_domain_flush_all(dom);
|
||||
spin_unlock_irqrestore(&dom->lock, flags);
|
||||
}
|
||||
|
||||
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
struct protection_domain *dom = to_pdomain(domain);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dom->lock, flags);
|
||||
amd_iommu_domain_flush_pages(dom, gather->start,
|
||||
gather->end - gather->start + 1);
|
||||
spin_unlock_irqrestore(&dom->lock, flags);
|
||||
}
|
||||
|
||||
static int amd_iommu_def_domain_type(struct device *dev)
|
||||
{
|
||||
struct iommu_dev_data *dev_data;
|
||||
|
|
@ -3034,15 +3068,10 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
|
|||
return true;
|
||||
}
|
||||
|
||||
static const struct iommu_dirty_ops amd_dirty_ops = {
|
||||
.set_dirty_tracking = amd_iommu_set_dirty_tracking,
|
||||
.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
|
||||
};
|
||||
|
||||
const struct iommu_ops amd_iommu_ops = {
|
||||
.capable = amd_iommu_capable,
|
||||
.blocked_domain = &blocked_domain,
|
||||
.release_domain = &release_domain,
|
||||
.release_domain = &blocked_domain,
|
||||
.identity_domain = &identity_domain.domain,
|
||||
.domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags,
|
||||
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
|
||||
|
|
@ -3053,17 +3082,6 @@ const struct iommu_ops amd_iommu_ops = {
|
|||
.is_attach_deferred = amd_iommu_is_attach_deferred,
|
||||
.def_domain_type = amd_iommu_def_domain_type,
|
||||
.page_response = amd_iommu_page_response,
|
||||
.default_domain_ops = &(const struct iommu_domain_ops) {
|
||||
.attach_dev = amd_iommu_attach_device,
|
||||
.map_pages = amd_iommu_map_pages,
|
||||
.unmap_pages = amd_iommu_unmap_pages,
|
||||
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
|
||||
.iova_to_phys = amd_iommu_iova_to_phys,
|
||||
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
|
||||
.iotlb_sync = amd_iommu_iotlb_sync,
|
||||
.free = amd_iommu_domain_free,
|
||||
.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
|
|
@ -3354,7 +3372,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
|
|||
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
|
||||
struct irte_ga *irte)
|
||||
{
|
||||
bool ret;
|
||||
int ret;
|
||||
|
||||
ret = __modify_irte_ga(iommu, devid, index, irte);
|
||||
if (ret)
|
||||
|
|
@ -4072,3 +4090,5 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
|
||||
|
|
|
|||
|
|
@ -672,7 +672,8 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
|
|||
}
|
||||
|
||||
static int apple_dart_attach_dev_paging(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
int ret, i;
|
||||
struct apple_dart_stream_map *stream_map;
|
||||
|
|
@ -693,7 +694,8 @@ static int apple_dart_attach_dev_paging(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
|
||||
struct apple_dart_stream_map *stream_map;
|
||||
|
|
@ -717,7 +719,8 @@ static struct iommu_domain apple_dart_identity_domain = {
|
|||
};
|
||||
|
||||
static int apple_dart_attach_dev_blocked(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
|
||||
struct apple_dart_stream_map *stream_map;
|
||||
|
|
@ -802,6 +805,8 @@ static int apple_dart_of_xlate(struct device *dev,
|
|||
struct apple_dart *cfg_dart;
|
||||
int i, sid;
|
||||
|
||||
put_device(&iommu_pdev->dev);
|
||||
|
||||
if (args->args_count != 1)
|
||||
return -EINVAL;
|
||||
sid = args->args[0];
|
||||
|
|
|
|||
|
|
@ -138,14 +138,15 @@ void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
|
|||
}
|
||||
|
||||
static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old_domain)
|
||||
{
|
||||
struct arm_smmu_nested_domain *nested_domain =
|
||||
to_smmu_nested_domain(domain);
|
||||
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
|
||||
struct arm_smmu_attach_state state = {
|
||||
.master = master,
|
||||
.old_domain = iommu_get_domain_for_dev(dev),
|
||||
.old_domain = old_domain,
|
||||
.ssid = IOMMU_NO_PASID,
|
||||
};
|
||||
struct arm_smmu_ste ste;
|
||||
|
|
|
|||
|
|
@ -1464,7 +1464,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
|
|||
cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
|
||||
&cd_table->cdtab_dma,
|
||||
GFP_KERNEL);
|
||||
if (!cd_table->l2.l2ptrs) {
|
||||
if (!cd_table->l2.l1tab) {
|
||||
ret = -ENOMEM;
|
||||
goto err_free_l2ptrs;
|
||||
}
|
||||
|
|
@ -3002,7 +3002,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
|
|||
master->ats_enabled = state->ats_enabled;
|
||||
}
|
||||
|
||||
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev,
|
||||
struct iommu_domain *old_domain)
|
||||
{
|
||||
int ret = 0;
|
||||
struct arm_smmu_ste target;
|
||||
|
|
@ -3010,7 +3011,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
|||
struct arm_smmu_device *smmu;
|
||||
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
|
||||
struct arm_smmu_attach_state state = {
|
||||
.old_domain = iommu_get_domain_for_dev(dev),
|
||||
.old_domain = old_domain,
|
||||
.ssid = IOMMU_NO_PASID,
|
||||
};
|
||||
struct arm_smmu_master *master;
|
||||
|
|
@ -3186,7 +3187,7 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
|
|||
|
||||
/*
|
||||
* When the last user of the CD table goes away downgrade the STE back
|
||||
* to a non-cd_table one.
|
||||
* to a non-cd_table one, by re-attaching its sid_domain.
|
||||
*/
|
||||
if (!arm_smmu_ssids_in_use(&master->cd_table)) {
|
||||
struct iommu_domain *sid_domain =
|
||||
|
|
@ -3194,12 +3195,14 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
|
|||
|
||||
if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
|
||||
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
|
||||
sid_domain->ops->attach_dev(sid_domain, dev);
|
||||
sid_domain->ops->attach_dev(sid_domain, dev,
|
||||
sid_domain);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
|
||||
struct iommu_domain *old_domain,
|
||||
struct device *dev,
|
||||
struct arm_smmu_ste *ste,
|
||||
unsigned int s1dss)
|
||||
|
|
@ -3207,7 +3210,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
|
|||
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
|
||||
struct arm_smmu_attach_state state = {
|
||||
.master = master,
|
||||
.old_domain = iommu_get_domain_for_dev(dev),
|
||||
.old_domain = old_domain,
|
||||
.ssid = IOMMU_NO_PASID,
|
||||
};
|
||||
|
||||
|
|
@ -3248,14 +3251,16 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old_domain)
|
||||
{
|
||||
struct arm_smmu_ste ste;
|
||||
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
|
||||
|
||||
arm_smmu_master_clear_vmaster(master);
|
||||
arm_smmu_make_bypass_ste(master->smmu, &ste);
|
||||
arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
|
||||
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
|
||||
STRTAB_STE_1_S1DSS_BYPASS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -3269,14 +3274,15 @@ static struct iommu_domain arm_smmu_identity_domain = {
|
|||
};
|
||||
|
||||
static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old_domain)
|
||||
{
|
||||
struct arm_smmu_ste ste;
|
||||
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
|
||||
|
||||
arm_smmu_master_clear_vmaster(master);
|
||||
arm_smmu_make_abort_ste(&ste);
|
||||
arm_smmu_attach_dev_ste(domain, dev, &ste,
|
||||
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
|
||||
STRTAB_STE_1_S1DSS_TERMINATE);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -3582,12 +3588,6 @@ static void arm_smmu_release_device(struct device *dev)
|
|||
|
||||
WARN_ON(master->iopf_refcount);
|
||||
|
||||
/* Put the STE back to what arm_smmu_init_strtab() sets */
|
||||
if (dev->iommu->require_direct)
|
||||
arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
|
||||
else
|
||||
arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
|
||||
|
||||
arm_smmu_disable_pasid(master);
|
||||
arm_smmu_remove_master(master);
|
||||
if (arm_smmu_cdtab_allocated(&master->cd_table))
|
||||
|
|
@ -3678,6 +3678,7 @@ static int arm_smmu_def_domain_type(struct device *dev)
|
|||
static const struct iommu_ops arm_smmu_ops = {
|
||||
.identity_domain = &arm_smmu_identity_domain,
|
||||
.blocked_domain = &arm_smmu_blocked_domain,
|
||||
.release_domain = &arm_smmu_blocked_domain,
|
||||
.capable = arm_smmu_capable,
|
||||
.hw_info = arm_smmu_hw_info,
|
||||
.domain_alloc_sva = arm_smmu_sva_domain_alloc,
|
||||
|
|
|
|||
|
|
@ -367,6 +367,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
|
|||
static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
|
||||
{ .compatible = "qcom,adreno" },
|
||||
{ .compatible = "qcom,adreno-gmu" },
|
||||
{ .compatible = "qcom,glymur-mdss" },
|
||||
{ .compatible = "qcom,mdp4" },
|
||||
{ .compatible = "qcom,mdss" },
|
||||
{ .compatible = "qcom,qcm2290-mdss" },
|
||||
|
|
@ -431,17 +432,19 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
|
|||
|
||||
/*
|
||||
* Some platforms support more than the Arm SMMU architected maximum of
|
||||
* 128 stream matching groups. For unknown reasons, the additional
|
||||
* groups don't exhibit the same behavior as the architected registers,
|
||||
* so limit the groups to 128 until the behavior is fixed for the other
|
||||
* groups.
|
||||
* 128 stream matching groups. The additional registers appear to have
|
||||
* the same behavior as the architected registers in the hardware.
|
||||
* However, on some firmware versions, the hypervisor does not
|
||||
* correctly trap and emulate accesses to the additional registers,
|
||||
* resulting in unexpected behavior.
|
||||
*
|
||||
* If there are more than 128 groups, use the last reliable group to
|
||||
* detect if we need to apply the bypass quirk.
|
||||
*/
|
||||
if (smmu->num_mapping_groups > 128) {
|
||||
dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n");
|
||||
smmu->num_mapping_groups = 128;
|
||||
}
|
||||
|
||||
last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
|
||||
if (smmu->num_mapping_groups > 128)
|
||||
last_s2cr = ARM_SMMU_GR0_S2CR(127);
|
||||
else
|
||||
last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
|
||||
|
||||
/*
|
||||
* With some firmware versions writes to S2CR of type FAULT are
|
||||
|
|
@ -464,6 +467,11 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
|
|||
|
||||
reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS);
|
||||
arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg);
|
||||
|
||||
if (smmu->num_mapping_groups > 128) {
|
||||
dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n");
|
||||
smmu->num_mapping_groups = 128;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < smmu->num_mapping_groups; i++) {
|
||||
|
|
|
|||
|
|
@ -1165,7 +1165,8 @@ static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg,
|
|||
}
|
||||
}
|
||||
|
||||
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
|
|
@ -1234,7 +1235,8 @@ static int arm_smmu_attach_dev_type(struct device *dev,
|
|||
}
|
||||
|
||||
static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS);
|
||||
}
|
||||
|
|
@ -1249,7 +1251,8 @@ static struct iommu_domain arm_smmu_identity_domain = {
|
|||
};
|
||||
|
||||
static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -359,7 +359,8 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
|
|||
kfree(qcom_domain);
|
||||
}
|
||||
|
||||
static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int qcom_iommu_attach_dev(struct iommu_domain *domain,
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct qcom_iommu_dev *qcom_iommu = dev_iommu_priv_get(dev);
|
||||
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
|
||||
|
|
@ -388,18 +389,18 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
|
|||
}
|
||||
|
||||
static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct qcom_iommu_domain *qcom_domain;
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct qcom_iommu_dev *qcom_iommu = dev_iommu_priv_get(dev);
|
||||
unsigned int i;
|
||||
|
||||
if (domain == identity_domain || !domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
qcom_domain = to_qcom_iommu_domain(domain);
|
||||
qcom_domain = to_qcom_iommu_domain(old);
|
||||
if (WARN_ON(!qcom_domain->iommu))
|
||||
return -EINVAL;
|
||||
|
||||
|
|
@ -565,14 +566,14 @@ static int qcom_iommu_of_xlate(struct device *dev,
|
|||
|
||||
qcom_iommu = platform_get_drvdata(iommu_pdev);
|
||||
|
||||
put_device(&iommu_pdev->dev);
|
||||
|
||||
/* make sure the asid specified in dt is valid, so we don't have
|
||||
* to sanity check this elsewhere:
|
||||
*/
|
||||
if (WARN_ON(asid > qcom_iommu->max_asid) ||
|
||||
WARN_ON(qcom_iommu->ctxs[asid] == NULL)) {
|
||||
put_device(&iommu_pdev->dev);
|
||||
WARN_ON(qcom_iommu->ctxs[asid] == NULL))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!dev_iommu_priv_get(dev)) {
|
||||
dev_iommu_priv_set(dev, qcom_iommu);
|
||||
|
|
@ -581,10 +582,8 @@ static int qcom_iommu_of_xlate(struct device *dev,
|
|||
* multiple different iommu devices. Multiple context
|
||||
* banks are ok, but multiple devices are not:
|
||||
*/
|
||||
if (WARN_ON(qcom_iommu != dev_iommu_priv_get(dev))) {
|
||||
put_device(&iommu_pdev->dev);
|
||||
if (WARN_ON(qcom_iommu != dev_iommu_priv_get(dev)))
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return iommu_fwspec_add_ids(dev, &asid, 1);
|
||||
|
|
|
|||
|
|
@ -984,7 +984,8 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
|
|||
}
|
||||
|
||||
static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
|
||||
struct exynos_iommu_domain *domain;
|
||||
|
|
@ -1035,7 +1036,8 @@ static struct iommu_domain exynos_identity_domain = {
|
|||
};
|
||||
|
||||
static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
|
||||
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
|
||||
|
|
@ -1044,7 +1046,7 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
|
|||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
err = exynos_iommu_identity_attach(&exynos_identity_domain, dev);
|
||||
err = exynos_iommu_identity_attach(&exynos_identity_domain, dev, old);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
@ -1429,8 +1431,6 @@ static void exynos_iommu_release_device(struct device *dev)
|
|||
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
|
||||
struct sysmmu_drvdata *data;
|
||||
|
||||
WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev));
|
||||
|
||||
list_for_each_entry(data, &owner->controllers, owner_node)
|
||||
device_link_del(data->link);
|
||||
}
|
||||
|
|
@ -1446,17 +1446,14 @@ static int exynos_iommu_of_xlate(struct device *dev,
|
|||
return -ENODEV;
|
||||
|
||||
data = platform_get_drvdata(sysmmu);
|
||||
if (!data) {
|
||||
put_device(&sysmmu->dev);
|
||||
put_device(&sysmmu->dev);
|
||||
if (!data)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!owner) {
|
||||
owner = kzalloc(sizeof(*owner), GFP_KERNEL);
|
||||
if (!owner) {
|
||||
put_device(&sysmmu->dev);
|
||||
if (!owner)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&owner->controllers);
|
||||
mutex_init(&owner->rpm_lock);
|
||||
|
|
@ -1476,6 +1473,7 @@ static int exynos_iommu_of_xlate(struct device *dev,
|
|||
|
||||
static const struct iommu_ops exynos_iommu_ops = {
|
||||
.identity_domain = &exynos_identity_domain,
|
||||
.release_domain = &exynos_identity_domain,
|
||||
.domain_alloc_paging = exynos_iommu_domain_alloc_paging,
|
||||
.device_group = generic_device_group,
|
||||
.probe_device = exynos_iommu_probe_device,
|
||||
|
|
|
|||
|
|
@ -238,7 +238,7 @@ static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val)
|
|||
}
|
||||
|
||||
static int fsl_pamu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
|
||||
unsigned long flags;
|
||||
|
|
@ -298,9 +298,9 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
|
|||
* switches to what looks like BLOCKING.
|
||||
*/
|
||||
static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct fsl_dma_domain *dma_domain;
|
||||
const u32 *prop;
|
||||
int len;
|
||||
|
|
@ -311,11 +311,11 @@ static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain,
|
|||
* Hack to keep things working as they always have, only leaving an
|
||||
* UNMANAGED domain makes it BLOCKING.
|
||||
*/
|
||||
if (domain == platform_domain || !domain ||
|
||||
domain->type != IOMMU_DOMAIN_UNMANAGED)
|
||||
if (old == platform_domain || !old ||
|
||||
old->type != IOMMU_DOMAIN_UNMANAGED)
|
||||
return 0;
|
||||
|
||||
dma_domain = to_fsl_dma_domain(domain);
|
||||
dma_domain = to_fsl_dma_domain(old);
|
||||
|
||||
/*
|
||||
* Use LIODN of the PCI controller while detaching a
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
CONFIG_KUNIT=y
|
||||
CONFIG_GENERIC_PT=y
|
||||
CONFIG_DEBUG_GENERIC_PT=y
|
||||
CONFIG_IOMMU_PT=y
|
||||
CONFIG_IOMMU_PT_AMDV1=y
|
||||
CONFIG_IOMMU_PT_VTDSS=y
|
||||
CONFIG_IOMMU_PT_X86_64=y
|
||||
CONFIG_IOMMU_PT_KUNIT_TEST=y
|
||||
|
||||
CONFIG_IOMMUFD=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_FAULT_INJECTION=y
|
||||
CONFIG_RUNTIME_TESTING_MENU=y
|
||||
CONFIG_IOMMUFD_TEST=y
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
menuconfig GENERIC_PT
|
||||
bool "Generic Radix Page Table" if COMPILE_TEST
|
||||
help
|
||||
Generic library for building radix tree page tables.
|
||||
|
||||
Generic PT provides a set of HW page table formats and a common
|
||||
set of APIs to work with them.
|
||||
|
||||
if GENERIC_PT
|
||||
config DEBUG_GENERIC_PT
|
||||
bool "Extra debugging checks for GENERIC_PT"
|
||||
help
|
||||
Enable extra run time debugging checks for GENERIC_PT code. This
|
||||
incurs a runtime cost and should not be enabled for production
|
||||
kernels.
|
||||
|
||||
The kunit tests require this to be enabled to get full coverage.
|
||||
|
||||
config IOMMU_PT
|
||||
tristate "IOMMU Page Tables"
|
||||
select IOMMU_API
|
||||
depends on IOMMU_SUPPORT
|
||||
depends on GENERIC_PT
|
||||
help
|
||||
Generic library for building IOMMU page tables
|
||||
|
||||
IOMMU_PT provides an implementation of the page table operations
|
||||
related to struct iommu_domain using GENERIC_PT. It provides a single
|
||||
implementation of the page table operations that can be shared by
|
||||
multiple drivers.
|
||||
|
||||
if IOMMU_PT
|
||||
config IOMMU_PT_AMDV1
|
||||
tristate "IOMMU page table for 64-bit AMD IOMMU v1"
|
||||
depends on !GENERIC_ATOMIC64 # for cmpxchg64
|
||||
help
|
||||
iommu_domain implementation for the AMD v1 page table. AMDv1 is the
|
||||
"host" page table. It supports granular page sizes of almost every
|
||||
power of 2 and decodes the full 64-bit IOVA space.
|
||||
|
||||
Selected automatically by an IOMMU driver that uses this format.
|
||||
|
||||
config IOMMU_PT_VTDSS
|
||||
tristate "IOMMU page table for Intel VT-d Second Stage"
|
||||
depends on !GENERIC_ATOMIC64 # for cmpxchg64
|
||||
help
|
||||
iommu_domain implementation for the Intel VT-d's 64 bit 3/4/5
|
||||
level Second Stage page table. It is similar to the X86_64 format with
|
||||
4K/2M/1G page sizes.
|
||||
|
||||
Selected automatically by an IOMMU driver that uses this format.
|
||||
|
||||
config IOMMU_PT_X86_64
|
||||
tristate "IOMMU page table for x86 64-bit, 4/5 levels"
|
||||
depends on !GENERIC_ATOMIC64 # for cmpxchg64
|
||||
help
|
||||
iommu_domain implementation for the x86 64-bit 4/5 level page table.
|
||||
It supports 4K/2M/1G page sizes and can decode a sign-extended
|
||||
portion of the 64-bit IOVA space.
|
||||
|
||||
Selected automatically by an IOMMU driver that uses this format.
|
||||
|
||||
config IOMMU_PT_KUNIT_TEST
|
||||
tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
|
||||
depends on KUNIT
|
||||
depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1
|
||||
depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64
|
||||
depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
|
||||
default KUNIT_ALL_TESTS
|
||||
help
|
||||
Enable kunit tests for GENERIC_PT and IOMMU_PT that covers all the
|
||||
enabled page table formats. The test covers most of the GENERIC_PT
|
||||
functions provided by the page table format, as well as covering the
|
||||
iommu_domain related functions.
|
||||
|
||||
endif
|
||||
endif
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
iommu_pt_fmt-$(CONFIG_IOMMU_PT_AMDV1) += amdv1
|
||||
iommu_pt_fmt-$(CONFIG_IOMMUFD_TEST) += mock
|
||||
|
||||
iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
|
||||
|
||||
iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64
|
||||
|
||||
IOMMU_PT_KUNIT_TEST :=
|
||||
define create_format
|
||||
obj-$(2) += iommu_$(1).o
|
||||
iommu_pt_kunit_test-y += kunit_iommu_$(1).o
|
||||
CFLAGS_kunit_iommu_$(1).o += -DGENERIC_PT_KUNIT=1
|
||||
IOMMU_PT_KUNIT_TEST := iommu_pt_kunit_test.o
|
||||
|
||||
endef
|
||||
|
||||
$(eval $(foreach fmt,$(iommu_pt_fmt-y),$(call create_format,$(fmt),y)))
|
||||
$(eval $(foreach fmt,$(iommu_pt_fmt-m),$(call create_format,$(fmt),m)))
|
||||
|
||||
# The kunit objects are constructed by compiling the main source
|
||||
# with -DGENERIC_PT_KUNIT
|
||||
$(obj)/kunit_iommu_%.o: $(src)/iommu_%.c FORCE
|
||||
$(call rule_mkdir)
|
||||
$(call if_changed_dep,cc_o_c)
|
||||
|
||||
obj-$(CONFIG_IOMMU_PT_KUNIT_TEST) += $(IOMMU_PT_KUNIT_TEST)
|
||||
|
|
@ -0,0 +1,411 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* AMD IOMMU v1 page table
|
||||
*
|
||||
* This is described in Section "2.2.3 I/O Page Tables for Host Translations"
|
||||
* of the "AMD I/O Virtualization Technology (IOMMU) Specification"
|
||||
*
|
||||
* Note the level numbering here matches the core code, so level 0 is the same
|
||||
* as mode 1.
|
||||
*
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_AMDV1_H
|
||||
#define __GENERIC_PT_FMT_AMDV1_H
|
||||
|
||||
#include "defs_amdv1.h"
|
||||
#include "../pt_defs.h"
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/container_of.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
enum {
|
||||
PT_ITEM_WORD_SIZE = sizeof(u64),
|
||||
/*
|
||||
* The IOMMUFD selftest uses the AMDv1 format with some alterations It
|
||||
* uses a 2k page size to test cases where the CPU page size is not the
|
||||
* same.
|
||||
*/
|
||||
#ifdef AMDV1_IOMMUFD_SELFTEST
|
||||
PT_MAX_VA_ADDRESS_LG2 = 56,
|
||||
PT_MAX_OUTPUT_ADDRESS_LG2 = 51,
|
||||
PT_MAX_TOP_LEVEL = 4,
|
||||
PT_GRANULE_LG2SZ = 11,
|
||||
#else
|
||||
PT_MAX_VA_ADDRESS_LG2 = 64,
|
||||
PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
|
||||
PT_MAX_TOP_LEVEL = 5,
|
||||
PT_GRANULE_LG2SZ = 12,
|
||||
#endif
|
||||
PT_TABLEMEM_LG2SZ = 12,
|
||||
|
||||
/* The DTE only has these bits for the top phyiscal address */
|
||||
PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12),
|
||||
};
|
||||
|
||||
/* PTE bits */
|
||||
enum {
|
||||
AMDV1PT_FMT_PR = BIT(0),
|
||||
AMDV1PT_FMT_D = BIT(6),
|
||||
AMDV1PT_FMT_NEXT_LEVEL = GENMASK_ULL(11, 9),
|
||||
AMDV1PT_FMT_OA = GENMASK_ULL(51, 12),
|
||||
AMDV1PT_FMT_FC = BIT_ULL(60),
|
||||
AMDV1PT_FMT_IR = BIT_ULL(61),
|
||||
AMDV1PT_FMT_IW = BIT_ULL(62),
|
||||
};
|
||||
|
||||
/*
|
||||
* gcc 13 has a bug where it thinks the output of FIELD_GET() is an enum, make
|
||||
* these defines to avoid it.
|
||||
*/
|
||||
#define AMDV1PT_FMT_NL_DEFAULT 0
|
||||
#define AMDV1PT_FMT_NL_SIZE 7
|
||||
|
||||
static inline pt_oaddr_t amdv1pt_table_pa(const struct pt_state *pts)
|
||||
{
|
||||
u64 entry = pts->entry;
|
||||
|
||||
if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES))
|
||||
entry = __sme_clr(entry);
|
||||
return oalog2_mul(FIELD_GET(AMDV1PT_FMT_OA, entry), PT_GRANULE_LG2SZ);
|
||||
}
|
||||
#define pt_table_pa amdv1pt_table_pa
|
||||
|
||||
/* Returns the oa for the start of the contiguous entry */
|
||||
static inline pt_oaddr_t amdv1pt_entry_oa(const struct pt_state *pts)
|
||||
{
|
||||
u64 entry = pts->entry;
|
||||
pt_oaddr_t oa;
|
||||
|
||||
if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES))
|
||||
entry = __sme_clr(entry);
|
||||
oa = FIELD_GET(AMDV1PT_FMT_OA, entry);
|
||||
|
||||
if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, entry) == AMDV1PT_FMT_NL_SIZE) {
|
||||
unsigned int sz_bits = oaffz(oa);
|
||||
|
||||
oa = oalog2_set_mod(oa, 0, sz_bits);
|
||||
} else if (PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, entry) !=
|
||||
AMDV1PT_FMT_NL_DEFAULT))
|
||||
return 0;
|
||||
return oalog2_mul(oa, PT_GRANULE_LG2SZ);
|
||||
}
|
||||
#define pt_entry_oa amdv1pt_entry_oa
|
||||
|
||||
static inline bool amdv1pt_can_have_leaf(const struct pt_state *pts)
|
||||
{
|
||||
/*
|
||||
* Table 15: Page Table Level Parameters
|
||||
* The top most level cannot have translation entries
|
||||
*/
|
||||
return pts->level < PT_MAX_TOP_LEVEL;
|
||||
}
|
||||
#define pt_can_have_leaf amdv1pt_can_have_leaf
|
||||
|
||||
/* Body in pt_fmt_defaults.h */
|
||||
static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
|
||||
|
||||
static inline unsigned int
|
||||
amdv1pt_entry_num_contig_lg2(const struct pt_state *pts)
|
||||
{
|
||||
u32 code;
|
||||
|
||||
if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) ==
|
||||
AMDV1PT_FMT_NL_DEFAULT)
|
||||
return ilog2(1);
|
||||
|
||||
PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) !=
|
||||
AMDV1PT_FMT_NL_SIZE);
|
||||
|
||||
/*
|
||||
* The contiguous size is encoded in the length of a string of 1's in
|
||||
* the low bits of the OA. Reverse the equation:
|
||||
* code = log2_to_int(num_contig_lg2 + item_lg2sz -
|
||||
* PT_GRANULE_LG2SZ - 1) - 1
|
||||
* Which can be expressed as:
|
||||
* num_contig_lg2 = oalog2_ffz(code) + 1 -
|
||||
* item_lg2sz - PT_GRANULE_LG2SZ
|
||||
*
|
||||
* Assume the bit layout is correct and remove the masking. Reorganize
|
||||
* the equation to move all the arithmetic before the ffz.
|
||||
*/
|
||||
code = pts->entry >> (__bf_shf(AMDV1PT_FMT_OA) - 1 +
|
||||
pt_table_item_lg2sz(pts) - PT_GRANULE_LG2SZ);
|
||||
return ffz_t(u32, code);
|
||||
}
|
||||
#define pt_entry_num_contig_lg2 amdv1pt_entry_num_contig_lg2
|
||||
|
||||
static inline unsigned int amdv1pt_num_items_lg2(const struct pt_state *pts)
|
||||
{
|
||||
/*
|
||||
* Top entry covers bits [63:57] only, this is handled through
|
||||
* max_vasz_lg2.
|
||||
*/
|
||||
if (PT_WARN_ON(pts->level == 5))
|
||||
return 7;
|
||||
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
|
||||
}
|
||||
#define pt_num_items_lg2 amdv1pt_num_items_lg2
|
||||
|
||||
static inline pt_vaddr_t amdv1pt_possible_sizes(const struct pt_state *pts)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
|
||||
if (!amdv1pt_can_have_leaf(pts))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Table 14: Example Page Size Encodings
|
||||
* Address bits 51:32 can be used to encode page sizes greater than 4
|
||||
* Gbytes. Address bits 63:52 are zero-extended.
|
||||
*
|
||||
* 512GB Pages are not supported due to a hardware bug.
|
||||
* Otherwise every power of two size is supported.
|
||||
*/
|
||||
return GENMASK_ULL(min(51, isz_lg2 + amdv1pt_num_items_lg2(pts) - 1),
|
||||
isz_lg2) & ~SZ_512G;
|
||||
}
|
||||
#define pt_possible_sizes amdv1pt_possible_sizes
|
||||
|
||||
static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts)
|
||||
{
|
||||
const u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
unsigned int next_level;
|
||||
u64 entry;
|
||||
|
||||
pts->entry = entry = READ_ONCE(*tablep);
|
||||
if (!(entry & AMDV1PT_FMT_PR))
|
||||
return PT_ENTRY_EMPTY;
|
||||
|
||||
next_level = FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry);
|
||||
if (pts->level == 0 || next_level == AMDV1PT_FMT_NL_DEFAULT ||
|
||||
next_level == AMDV1PT_FMT_NL_SIZE)
|
||||
return PT_ENTRY_OA;
|
||||
return PT_ENTRY_TABLE;
|
||||
}
|
||||
#define pt_load_entry_raw amdv1pt_load_entry_raw
|
||||
|
||||
static inline void
|
||||
amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 entry;
|
||||
|
||||
if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
|
||||
return;
|
||||
|
||||
entry = AMDV1PT_FMT_PR |
|
||||
FIELD_PREP(AMDV1PT_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
|
||||
attrs->descriptor_bits;
|
||||
|
||||
if (oasz_lg2 == isz_lg2) {
|
||||
entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL,
|
||||
AMDV1PT_FMT_NL_DEFAULT);
|
||||
WRITE_ONCE(*tablep, entry);
|
||||
} else {
|
||||
unsigned int num_contig_lg2 = oasz_lg2 - isz_lg2;
|
||||
u64 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL,
|
||||
AMDV1PT_FMT_NL_SIZE) |
|
||||
FIELD_PREP(AMDV1PT_FMT_OA,
|
||||
oalog2_to_int(oasz_lg2 - PT_GRANULE_LG2SZ -
|
||||
1) -
|
||||
1);
|
||||
|
||||
/* See amdv1pt_clear_entries() */
|
||||
if (num_contig_lg2 <= ilog2(32)) {
|
||||
for (; tablep != end; tablep++)
|
||||
WRITE_ONCE(*tablep, entry);
|
||||
} else {
|
||||
memset64(tablep, entry, log2_to_int(num_contig_lg2));
|
||||
}
|
||||
}
|
||||
pts->entry = entry;
|
||||
}
|
||||
#define pt_install_leaf_entry amdv1pt_install_leaf_entry
|
||||
|
||||
static inline bool amdv1pt_install_table(struct pt_state *pts,
|
||||
pt_oaddr_t table_pa,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
u64 entry;
|
||||
|
||||
/*
|
||||
* IR and IW are ANDed from the table levels along with the PTE. We
|
||||
* always control permissions from the PTE, so always set IR and IW for
|
||||
* tables.
|
||||
*/
|
||||
entry = AMDV1PT_FMT_PR |
|
||||
FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL, pts->level) |
|
||||
FIELD_PREP(AMDV1PT_FMT_OA,
|
||||
log2_div(table_pa, PT_GRANULE_LG2SZ)) |
|
||||
AMDV1PT_FMT_IR | AMDV1PT_FMT_IW;
|
||||
if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES))
|
||||
entry = __sme_set(entry);
|
||||
return pt_table_install64(pts, entry);
|
||||
}
|
||||
#define pt_install_table amdv1pt_install_table
|
||||
|
||||
static inline void amdv1pt_attr_from_entry(const struct pt_state *pts,
|
||||
struct pt_write_attrs *attrs)
|
||||
{
|
||||
attrs->descriptor_bits =
|
||||
pts->entry & (AMDV1PT_FMT_FC | AMDV1PT_FMT_IR | AMDV1PT_FMT_IW);
|
||||
}
|
||||
#define pt_attr_from_entry amdv1pt_attr_from_entry
|
||||
|
||||
static inline void amdv1pt_clear_entries(struct pt_state *pts,
|
||||
unsigned int num_contig_lg2)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
/*
|
||||
* gcc generates rep stos for the io-pgtable code, and this difference
|
||||
* can show in microbenchmarks with larger contiguous page sizes.
|
||||
* rep is slower for small cases.
|
||||
*/
|
||||
if (num_contig_lg2 <= ilog2(32)) {
|
||||
for (; tablep != end; tablep++)
|
||||
WRITE_ONCE(*tablep, 0);
|
||||
} else {
|
||||
memset64(tablep, 0, log2_to_int(num_contig_lg2));
|
||||
}
|
||||
}
|
||||
#define pt_clear_entries amdv1pt_clear_entries
|
||||
|
||||
static inline bool amdv1pt_entry_is_write_dirty(const struct pt_state *pts)
|
||||
{
|
||||
unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts);
|
||||
u64 *tablep = pt_cur_table(pts, u64) +
|
||||
log2_set_mod(pts->index, 0, num_contig_lg2);
|
||||
u64 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
for (; tablep != end; tablep++)
|
||||
if (READ_ONCE(*tablep) & AMDV1PT_FMT_D)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#define pt_entry_is_write_dirty amdv1pt_entry_is_write_dirty
|
||||
|
||||
static inline void amdv1pt_entry_make_write_clean(struct pt_state *pts)
|
||||
{
|
||||
unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts);
|
||||
u64 *tablep = pt_cur_table(pts, u64) +
|
||||
log2_set_mod(pts->index, 0, num_contig_lg2);
|
||||
u64 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
for (; tablep != end; tablep++)
|
||||
WRITE_ONCE(*tablep, READ_ONCE(*tablep) & ~(u64)AMDV1PT_FMT_D);
|
||||
}
|
||||
#define pt_entry_make_write_clean amdv1pt_entry_make_write_clean
|
||||
|
||||
static inline bool amdv1pt_entry_make_write_dirty(struct pt_state *pts)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 new = pts->entry | AMDV1PT_FMT_D;
|
||||
|
||||
return try_cmpxchg64(tablep, &pts->entry, new);
|
||||
}
|
||||
#define pt_entry_make_write_dirty amdv1pt_entry_make_write_dirty
|
||||
|
||||
/* --- iommu */
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#define pt_iommu_table pt_iommu_amdv1
|
||||
|
||||
/* The common struct is in the per-format common struct */
|
||||
static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
|
||||
{
|
||||
return &container_of(iommu_table, struct pt_iommu_amdv1, iommu)
|
||||
->amdpt.common;
|
||||
}
|
||||
|
||||
static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
|
||||
{
|
||||
return &container_of(common, struct pt_iommu_amdv1, amdpt.common)->iommu;
|
||||
}
|
||||
|
||||
static inline int amdv1pt_iommu_set_prot(struct pt_common *common,
|
||||
struct pt_write_attrs *attrs,
|
||||
unsigned int iommu_prot)
|
||||
{
|
||||
u64 pte = 0;
|
||||
|
||||
if (pt_feature(common, PT_FEAT_AMDV1_FORCE_COHERENCE))
|
||||
pte |= AMDV1PT_FMT_FC;
|
||||
if (iommu_prot & IOMMU_READ)
|
||||
pte |= AMDV1PT_FMT_IR;
|
||||
if (iommu_prot & IOMMU_WRITE)
|
||||
pte |= AMDV1PT_FMT_IW;
|
||||
|
||||
/*
|
||||
* Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
|
||||
* control this. For now if the tables use sme_set then so do the ptes.
|
||||
*/
|
||||
if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES))
|
||||
pte = __sme_set(pte);
|
||||
|
||||
attrs->descriptor_bits = pte;
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_set_prot amdv1pt_iommu_set_prot
|
||||
|
||||
static inline int amdv1pt_iommu_fmt_init(struct pt_iommu_amdv1 *iommu_table,
|
||||
const struct pt_iommu_amdv1_cfg *cfg)
|
||||
{
|
||||
struct pt_amdv1 *table = &iommu_table->amdpt;
|
||||
unsigned int max_vasz_lg2 = PT_MAX_VA_ADDRESS_LG2;
|
||||
|
||||
if (cfg->starting_level == 0 || cfg->starting_level > PT_MAX_TOP_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
if (!pt_feature(&table->common, PT_FEAT_DYNAMIC_TOP) &&
|
||||
cfg->starting_level != PT_MAX_TOP_LEVEL)
|
||||
max_vasz_lg2 = PT_GRANULE_LG2SZ +
|
||||
(PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) *
|
||||
(cfg->starting_level + 1);
|
||||
|
||||
table->common.max_vasz_lg2 =
|
||||
min(max_vasz_lg2, cfg->common.hw_max_vasz_lg2);
|
||||
table->common.max_oasz_lg2 =
|
||||
min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
|
||||
pt_top_set_level(&table->common, cfg->starting_level);
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_fmt_init amdv1pt_iommu_fmt_init
|
||||
|
||||
#ifndef PT_FMT_VARIANT
|
||||
static inline void
|
||||
amdv1pt_iommu_fmt_hw_info(struct pt_iommu_amdv1 *table,
|
||||
const struct pt_range *top_range,
|
||||
struct pt_iommu_amdv1_hw_info *info)
|
||||
{
|
||||
info->host_pt_root = virt_to_phys(top_range->top_table);
|
||||
PT_WARN_ON(info->host_pt_root & ~PT_TOP_PHYS_MASK);
|
||||
info->mode = top_range->top_level + 1;
|
||||
}
|
||||
#define pt_iommu_fmt_hw_info amdv1pt_iommu_fmt_hw_info
|
||||
#endif
|
||||
|
||||
#if defined(GENERIC_PT_KUNIT)
|
||||
static const struct pt_iommu_amdv1_cfg amdv1_kunit_fmt_cfgs[] = {
|
||||
/* Matches what io_pgtable does */
|
||||
[0] = { .starting_level = 2 },
|
||||
};
|
||||
#define kunit_fmt_cfgs amdv1_kunit_fmt_cfgs
|
||||
enum { KUNIT_FMT_FEATURES = 0 };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_DEFS_AMDV1_H
|
||||
#define __GENERIC_PT_FMT_DEFS_AMDV1_H
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
typedef u64 pt_vaddr_t;
|
||||
typedef u64 pt_oaddr_t;
|
||||
|
||||
struct amdv1pt_write_attrs {
|
||||
u64 descriptor_bits;
|
||||
gfp_t gfp;
|
||||
};
|
||||
#define pt_write_attrs amdv1pt_write_attrs
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H
|
||||
#define __GENERIC_PT_FMT_DEFS_VTDSS_H
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
typedef u64 pt_vaddr_t;
|
||||
typedef u64 pt_oaddr_t;
|
||||
|
||||
struct vtdss_pt_write_attrs {
|
||||
u64 descriptor_bits;
|
||||
gfp_t gfp;
|
||||
};
|
||||
#define pt_write_attrs vtdss_pt_write_attrs
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_DEFS_X86_64_H
|
||||
#define __GENERIC_PT_FMT_DEFS_X86_64_H
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
typedef u64 pt_vaddr_t;
|
||||
typedef u64 pt_oaddr_t;
|
||||
|
||||
struct x86_64_pt_write_attrs {
|
||||
u64 descriptor_bits;
|
||||
gfp_t gfp;
|
||||
};
|
||||
#define pt_write_attrs x86_64_pt_write_attrs
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#define PT_FMT amdv1
|
||||
#define PT_SUPPORTED_FEATURES \
|
||||
(BIT(PT_FEAT_FULL_VA) | BIT(PT_FEAT_DYNAMIC_TOP) | \
|
||||
BIT(PT_FEAT_FLUSH_RANGE) | BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS) | \
|
||||
BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) | \
|
||||
BIT(PT_FEAT_AMDV1_FORCE_COHERENCE))
|
||||
#define PT_FORCE_ENABLED_FEATURES \
|
||||
(BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) | \
|
||||
BIT(PT_FEAT_AMDV1_FORCE_COHERENCE))
|
||||
|
||||
#include "iommu_template.h"
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#define AMDV1_IOMMUFD_SELFTEST 1
|
||||
#define PT_FMT amdv1
|
||||
#define PT_FMT_VARIANT mock
|
||||
#define PT_SUPPORTED_FEATURES 0
|
||||
|
||||
#include "iommu_template.h"
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Template to build the iommu module and kunit from the format and
|
||||
* implementation headers.
|
||||
*
|
||||
* The format should have:
|
||||
* #define PT_FMT <name>
|
||||
* #define PT_SUPPORTED_FEATURES (BIT(PT_FEAT_xx) | BIT(PT_FEAT_yy))
|
||||
* And optionally:
|
||||
* #define PT_FORCE_ENABLED_FEATURES ..
|
||||
* #define PT_FMT_VARIANT <suffix>
|
||||
*/
|
||||
#include <linux/args.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#ifdef PT_FMT_VARIANT
|
||||
#define PTPFX_RAW \
|
||||
CONCATENATE(CONCATENATE(PT_FMT, _), PT_FMT_VARIANT)
|
||||
#else
|
||||
#define PTPFX_RAW PT_FMT
|
||||
#endif
|
||||
|
||||
#define PTPFX CONCATENATE(PTPFX_RAW, _)
|
||||
|
||||
#define _PT_FMT_H PT_FMT.h
|
||||
#define PT_FMT_H __stringify(_PT_FMT_H)
|
||||
|
||||
#define _PT_DEFS_H CONCATENATE(defs_, _PT_FMT_H)
|
||||
#define PT_DEFS_H __stringify(_PT_DEFS_H)
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include PT_DEFS_H
|
||||
#include "../pt_defs.h"
|
||||
#include PT_FMT_H
|
||||
#include "../pt_common.h"
|
||||
|
||||
#ifndef GENERIC_PT_KUNIT
|
||||
#include "../iommu_pt.h"
|
||||
#else
|
||||
/*
|
||||
* The makefile will compile the .c file twice, once with GENERIC_PT_KUNIT set
|
||||
* which means we are building the kunit modle.
|
||||
*/
|
||||
#include "../kunit_generic_pt.h"
|
||||
#include "../kunit_iommu_pt.h"
|
||||
#endif
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#define PT_FMT vtdss
|
||||
#define PT_SUPPORTED_FEATURES \
|
||||
(BIT(PT_FEAT_FLUSH_RANGE) | BIT(PT_FEAT_VTDSS_FORCE_COHERENCE) | \
|
||||
BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE) | BIT(PT_FEAT_DMA_INCOHERENT))
|
||||
|
||||
#include "iommu_template.h"
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#define PT_FMT x86_64
|
||||
#define PT_SUPPORTED_FEATURES \
|
||||
(BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
|
||||
BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS) | \
|
||||
BIT(PT_FEAT_X86_64_AMD_ENCRYPT_TABLES) | BIT(PT_FEAT_DMA_INCOHERENT))
|
||||
|
||||
#include "iommu_template.h"
|
||||
|
|
@ -0,0 +1,285 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Intel VT-d Second Stange 5/4 level page table
|
||||
*
|
||||
* This is described in
|
||||
* Section "3.7 Second-Stage Translation"
|
||||
* Section "9.8 Second-Stage Paging Entries"
|
||||
*
|
||||
* Of the "Intel Virtualization Technology for Directed I/O Architecture
|
||||
* Specification".
|
||||
*
|
||||
* The named levels in the spec map to the pts->level as:
|
||||
* Table/SS-PTE - 0
|
||||
* Directory/SS-PDE - 1
|
||||
* Directory Ptr/SS-PDPTE - 2
|
||||
* PML4/SS-PML4E - 3
|
||||
* PML5/SS-PML5E - 4
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_VTDSS_H
|
||||
#define __GENERIC_PT_FMT_VTDSS_H
|
||||
|
||||
#include "defs_vtdss.h"
|
||||
#include "../pt_defs.h"
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/container_of.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
enum {
|
||||
PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
|
||||
PT_MAX_VA_ADDRESS_LG2 = 57,
|
||||
PT_ITEM_WORD_SIZE = sizeof(u64),
|
||||
PT_MAX_TOP_LEVEL = 4,
|
||||
PT_GRANULE_LG2SZ = 12,
|
||||
PT_TABLEMEM_LG2SZ = 12,
|
||||
|
||||
/* SSPTPTR is 4k aligned and limited by HAW */
|
||||
PT_TOP_PHYS_MASK = GENMASK_ULL(63, 12),
|
||||
};
|
||||
|
||||
/* Shared descriptor bits */
|
||||
enum {
|
||||
VTDSS_FMT_R = BIT(0),
|
||||
VTDSS_FMT_W = BIT(1),
|
||||
VTDSS_FMT_A = BIT(8),
|
||||
VTDSS_FMT_D = BIT(9),
|
||||
VTDSS_FMT_SNP = BIT(11),
|
||||
VTDSS_FMT_OA = GENMASK_ULL(51, 12),
|
||||
};
|
||||
|
||||
/* PDPTE/PDE */
|
||||
enum {
|
||||
VTDSS_FMT_PS = BIT(7),
|
||||
};
|
||||
|
||||
#define common_to_vtdss_pt(common_ptr) \
|
||||
container_of_const(common_ptr, struct pt_vtdss, common)
|
||||
#define to_vtdss_pt(pts) common_to_vtdss_pt((pts)->range->common)
|
||||
|
||||
static inline pt_oaddr_t vtdss_pt_table_pa(const struct pt_state *pts)
|
||||
{
|
||||
return oalog2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry),
|
||||
PT_TABLEMEM_LG2SZ);
|
||||
}
|
||||
#define pt_table_pa vtdss_pt_table_pa
|
||||
|
||||
static inline pt_oaddr_t vtdss_pt_entry_oa(const struct pt_state *pts)
|
||||
{
|
||||
return oalog2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry),
|
||||
PT_GRANULE_LG2SZ);
|
||||
}
|
||||
#define pt_entry_oa vtdss_pt_entry_oa
|
||||
|
||||
static inline bool vtdss_pt_can_have_leaf(const struct pt_state *pts)
|
||||
{
|
||||
return pts->level <= 2;
|
||||
}
|
||||
#define pt_can_have_leaf vtdss_pt_can_have_leaf
|
||||
|
||||
static inline unsigned int vtdss_pt_num_items_lg2(const struct pt_state *pts)
|
||||
{
|
||||
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
|
||||
}
|
||||
#define pt_num_items_lg2 vtdss_pt_num_items_lg2
|
||||
|
||||
static inline enum pt_entry_type vtdss_pt_load_entry_raw(struct pt_state *pts)
|
||||
{
|
||||
const u64 *tablep = pt_cur_table(pts, u64);
|
||||
u64 entry;
|
||||
|
||||
pts->entry = entry = READ_ONCE(tablep[pts->index]);
|
||||
if (!entry)
|
||||
return PT_ENTRY_EMPTY;
|
||||
if (pts->level == 0 ||
|
||||
(vtdss_pt_can_have_leaf(pts) && (pts->entry & VTDSS_FMT_PS)))
|
||||
return PT_ENTRY_OA;
|
||||
return PT_ENTRY_TABLE;
|
||||
}
|
||||
#define pt_load_entry_raw vtdss_pt_load_entry_raw
|
||||
|
||||
static inline void
|
||||
vtdss_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64);
|
||||
u64 entry;
|
||||
|
||||
if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
|
||||
return;
|
||||
|
||||
entry = FIELD_PREP(VTDSS_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
|
||||
attrs->descriptor_bits;
|
||||
if (pts->level != 0)
|
||||
entry |= VTDSS_FMT_PS;
|
||||
|
||||
WRITE_ONCE(tablep[pts->index], entry);
|
||||
pts->entry = entry;
|
||||
}
|
||||
#define pt_install_leaf_entry vtdss_pt_install_leaf_entry
|
||||
|
||||
static inline bool vtdss_pt_install_table(struct pt_state *pts,
|
||||
pt_oaddr_t table_pa,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
u64 entry;
|
||||
|
||||
entry = VTDSS_FMT_R | VTDSS_FMT_W |
|
||||
FIELD_PREP(VTDSS_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ));
|
||||
return pt_table_install64(pts, entry);
|
||||
}
|
||||
#define pt_install_table vtdss_pt_install_table
|
||||
|
||||
static inline void vtdss_pt_attr_from_entry(const struct pt_state *pts,
|
||||
struct pt_write_attrs *attrs)
|
||||
{
|
||||
attrs->descriptor_bits = pts->entry &
|
||||
(VTDSS_FMT_R | VTDSS_FMT_W | VTDSS_FMT_SNP);
|
||||
}
|
||||
#define pt_attr_from_entry vtdss_pt_attr_from_entry
|
||||
|
||||
static inline bool vtdss_pt_entry_is_write_dirty(const struct pt_state *pts)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
|
||||
return READ_ONCE(*tablep) & VTDSS_FMT_D;
|
||||
}
|
||||
#define pt_entry_is_write_dirty vtdss_pt_entry_is_write_dirty
|
||||
|
||||
static inline void vtdss_pt_entry_make_write_clean(struct pt_state *pts)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
|
||||
WRITE_ONCE(*tablep, READ_ONCE(*tablep) & ~(u64)VTDSS_FMT_D);
|
||||
}
|
||||
#define pt_entry_make_write_clean vtdss_pt_entry_make_write_clean
|
||||
|
||||
static inline bool vtdss_pt_entry_make_write_dirty(struct pt_state *pts)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 new = pts->entry | VTDSS_FMT_D;
|
||||
|
||||
return try_cmpxchg64(tablep, &pts->entry, new);
|
||||
}
|
||||
#define pt_entry_make_write_dirty vtdss_pt_entry_make_write_dirty
|
||||
|
||||
static inline unsigned int vtdss_pt_max_sw_bit(struct pt_common *common)
|
||||
{
|
||||
return 10;
|
||||
}
|
||||
#define pt_max_sw_bit vtdss_pt_max_sw_bit
|
||||
|
||||
static inline u64 vtdss_pt_sw_bit(unsigned int bitnr)
|
||||
{
|
||||
if (__builtin_constant_p(bitnr) && bitnr > 10)
|
||||
BUILD_BUG();
|
||||
|
||||
/* Bits marked Ignored in the specification */
|
||||
switch (bitnr) {
|
||||
case 0:
|
||||
return BIT(10);
|
||||
case 1 ... 9:
|
||||
return BIT_ULL((bitnr - 1) + 52);
|
||||
case 10:
|
||||
return BIT_ULL(63);
|
||||
/* Some bits in 9-3 are available in some entries */
|
||||
default:
|
||||
PT_WARN_ON(true);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#define pt_sw_bit vtdss_pt_sw_bit
|
||||
|
||||
/* --- iommu */
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#define pt_iommu_table pt_iommu_vtdss
|
||||
|
||||
/* The common struct is in the per-format common struct */
|
||||
static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
|
||||
{
|
||||
return &container_of(iommu_table, struct pt_iommu_table, iommu)
|
||||
->vtdss_pt.common;
|
||||
}
|
||||
|
||||
static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
|
||||
{
|
||||
return &container_of(common, struct pt_iommu_table, vtdss_pt.common)
|
||||
->iommu;
|
||||
}
|
||||
|
||||
static inline int vtdss_pt_iommu_set_prot(struct pt_common *common,
|
||||
struct pt_write_attrs *attrs,
|
||||
unsigned int iommu_prot)
|
||||
{
|
||||
u64 pte = 0;
|
||||
|
||||
/*
|
||||
* VTDSS does not have a present bit, so we tell if any entry is present
|
||||
* by checking for R or W.
|
||||
*/
|
||||
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
|
||||
return -EINVAL;
|
||||
|
||||
if (iommu_prot & IOMMU_READ)
|
||||
pte |= VTDSS_FMT_R;
|
||||
if (iommu_prot & IOMMU_WRITE)
|
||||
pte |= VTDSS_FMT_W;
|
||||
if (pt_feature(common, PT_FEAT_VTDSS_FORCE_COHERENCE))
|
||||
pte |= VTDSS_FMT_SNP;
|
||||
|
||||
if (pt_feature(common, PT_FEAT_VTDSS_FORCE_WRITEABLE) &&
|
||||
!(iommu_prot & IOMMU_WRITE)) {
|
||||
pr_err_ratelimited(
|
||||
"Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
attrs->descriptor_bits = pte;
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_set_prot vtdss_pt_iommu_set_prot
|
||||
|
||||
static inline int vtdss_pt_iommu_fmt_init(struct pt_iommu_vtdss *iommu_table,
|
||||
const struct pt_iommu_vtdss_cfg *cfg)
|
||||
{
|
||||
struct pt_vtdss *table = &iommu_table->vtdss_pt;
|
||||
|
||||
if (cfg->top_level > 4 || cfg->top_level < 2)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
pt_top_set_level(&table->common, cfg->top_level);
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init
|
||||
|
||||
static inline void
|
||||
vtdss_pt_iommu_fmt_hw_info(struct pt_iommu_vtdss *table,
|
||||
const struct pt_range *top_range,
|
||||
struct pt_iommu_vtdss_hw_info *info)
|
||||
{
|
||||
info->ssptptr = virt_to_phys(top_range->top_table);
|
||||
PT_WARN_ON(info->ssptptr & ~PT_TOP_PHYS_MASK);
|
||||
/*
|
||||
* top_level = 2 = 3 level table aw=1
|
||||
* top_level = 3 = 4 level table aw=2
|
||||
* top_level = 4 = 5 level table aw=3
|
||||
*/
|
||||
info->aw = top_range->top_level - 1;
|
||||
}
|
||||
#define pt_iommu_fmt_hw_info vtdss_pt_iommu_fmt_hw_info
|
||||
|
||||
#if defined(GENERIC_PT_KUNIT)
|
||||
static const struct pt_iommu_vtdss_cfg vtdss_kunit_fmt_cfgs[] = {
|
||||
[0] = { .common.hw_max_vasz_lg2 = 39, .top_level = 2},
|
||||
[1] = { .common.hw_max_vasz_lg2 = 48, .top_level = 3},
|
||||
[2] = { .common.hw_max_vasz_lg2 = 57, .top_level = 4},
|
||||
};
|
||||
#define kunit_fmt_cfgs vtdss_kunit_fmt_cfgs
|
||||
enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE) };
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* x86 page table. Supports the 4 and 5 level variations.
|
||||
*
|
||||
* The 4 and 5 level version is described in:
|
||||
* Section "4.4 4-Level Paging and 5-Level Paging" of the Intel Software
|
||||
* Developer's Manual Volume 3
|
||||
*
|
||||
* Section "9.7 First-Stage Paging Entries" of the "Intel Virtualization
|
||||
* Technology for Directed I/O Architecture Specification"
|
||||
*
|
||||
* Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O
|
||||
* Virtualization Technology (IOMMU) Specification"
|
||||
*
|
||||
* It is used by x86 CPUs, AMD and VT-d IOMMU HW.
|
||||
*
|
||||
* Note the 3 level format is very similar and almost implemented here. The
|
||||
* reserved/ignored layout is different and there are functional bit
|
||||
* differences.
|
||||
*
|
||||
* This format uses PT_FEAT_SIGN_EXTEND to have a upper/non-canonical/lower
|
||||
* split. PT_FEAT_SIGN_EXTEND is optional as AMD IOMMU sometimes uses non-sign
|
||||
* extended addressing with this page table format.
|
||||
*
|
||||
* The named levels in the spec map to the pts->level as:
|
||||
* Table/PTE - 0
|
||||
* Directory/PDE - 1
|
||||
* Directory Ptr/PDPTE - 2
|
||||
* PML4/PML4E - 3
|
||||
* PML5/PML5E - 4
|
||||
*/
|
||||
#ifndef __GENERIC_PT_FMT_X86_64_H
|
||||
#define __GENERIC_PT_FMT_X86_64_H
|
||||
|
||||
#include "defs_x86_64.h"
|
||||
#include "../pt_defs.h"
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/container_of.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
|
||||
enum {
|
||||
PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
|
||||
PT_MAX_VA_ADDRESS_LG2 = 57,
|
||||
PT_ITEM_WORD_SIZE = sizeof(u64),
|
||||
PT_MAX_TOP_LEVEL = 4,
|
||||
PT_GRANULE_LG2SZ = 12,
|
||||
PT_TABLEMEM_LG2SZ = 12,
|
||||
|
||||
/*
|
||||
* For AMD the GCR3 Base only has these bits. For VT-d FSPTPTR is 4k
|
||||
* aligned and is limited by the architected HAW
|
||||
*/
|
||||
PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12),
|
||||
};
|
||||
|
||||
/* Shared descriptor bits */
|
||||
enum {
|
||||
X86_64_FMT_P = BIT(0),
|
||||
X86_64_FMT_RW = BIT(1),
|
||||
X86_64_FMT_U = BIT(2),
|
||||
X86_64_FMT_A = BIT(5),
|
||||
X86_64_FMT_D = BIT(6),
|
||||
X86_64_FMT_OA = GENMASK_ULL(51, 12),
|
||||
X86_64_FMT_XD = BIT_ULL(63),
|
||||
};
|
||||
|
||||
/* PDPTE/PDE */
|
||||
enum {
|
||||
X86_64_FMT_PS = BIT(7),
|
||||
};
|
||||
|
||||
static inline pt_oaddr_t x86_64_pt_table_pa(const struct pt_state *pts)
|
||||
{
|
||||
u64 entry = pts->entry;
|
||||
|
||||
if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
|
||||
entry = __sme_clr(entry);
|
||||
return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
|
||||
PT_TABLEMEM_LG2SZ);
|
||||
}
|
||||
#define pt_table_pa x86_64_pt_table_pa
|
||||
|
||||
static inline pt_oaddr_t x86_64_pt_entry_oa(const struct pt_state *pts)
|
||||
{
|
||||
u64 entry = pts->entry;
|
||||
|
||||
if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
|
||||
entry = __sme_clr(entry);
|
||||
return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
|
||||
PT_GRANULE_LG2SZ);
|
||||
}
|
||||
#define pt_entry_oa x86_64_pt_entry_oa
|
||||
|
||||
static inline bool x86_64_pt_can_have_leaf(const struct pt_state *pts)
|
||||
{
|
||||
return pts->level <= 2;
|
||||
}
|
||||
#define pt_can_have_leaf x86_64_pt_can_have_leaf
|
||||
|
||||
static inline unsigned int x86_64_pt_num_items_lg2(const struct pt_state *pts)
|
||||
{
|
||||
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
|
||||
}
|
||||
#define pt_num_items_lg2 x86_64_pt_num_items_lg2
|
||||
|
||||
static inline enum pt_entry_type x86_64_pt_load_entry_raw(struct pt_state *pts)
|
||||
{
|
||||
const u64 *tablep = pt_cur_table(pts, u64);
|
||||
u64 entry;
|
||||
|
||||
pts->entry = entry = READ_ONCE(tablep[pts->index]);
|
||||
if (!(entry & X86_64_FMT_P))
|
||||
return PT_ENTRY_EMPTY;
|
||||
if (pts->level == 0 ||
|
||||
(x86_64_pt_can_have_leaf(pts) && (entry & X86_64_FMT_PS)))
|
||||
return PT_ENTRY_OA;
|
||||
return PT_ENTRY_TABLE;
|
||||
}
|
||||
#define pt_load_entry_raw x86_64_pt_load_entry_raw
|
||||
|
||||
static inline void
|
||||
x86_64_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64);
|
||||
u64 entry;
|
||||
|
||||
if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
|
||||
return;
|
||||
|
||||
entry = X86_64_FMT_P |
|
||||
FIELD_PREP(X86_64_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
|
||||
attrs->descriptor_bits;
|
||||
if (pts->level != 0)
|
||||
entry |= X86_64_FMT_PS;
|
||||
|
||||
WRITE_ONCE(tablep[pts->index], entry);
|
||||
pts->entry = entry;
|
||||
}
|
||||
#define pt_install_leaf_entry x86_64_pt_install_leaf_entry
|
||||
|
||||
static inline bool x86_64_pt_install_table(struct pt_state *pts,
|
||||
pt_oaddr_t table_pa,
|
||||
const struct pt_write_attrs *attrs)
|
||||
{
|
||||
u64 entry;
|
||||
|
||||
entry = X86_64_FMT_P | X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
|
||||
FIELD_PREP(X86_64_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ));
|
||||
if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
|
||||
entry = __sme_set(entry);
|
||||
return pt_table_install64(pts, entry);
|
||||
}
|
||||
#define pt_install_table x86_64_pt_install_table
|
||||
|
||||
static inline void x86_64_pt_attr_from_entry(const struct pt_state *pts,
|
||||
struct pt_write_attrs *attrs)
|
||||
{
|
||||
attrs->descriptor_bits = pts->entry &
|
||||
(X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
|
||||
X86_64_FMT_D | X86_64_FMT_XD);
|
||||
}
|
||||
#define pt_attr_from_entry x86_64_pt_attr_from_entry
|
||||
|
||||
static inline unsigned int x86_64_pt_max_sw_bit(struct pt_common *common)
|
||||
{
|
||||
return 12;
|
||||
}
|
||||
#define pt_max_sw_bit x86_64_pt_max_sw_bit
|
||||
|
||||
static inline u64 x86_64_pt_sw_bit(unsigned int bitnr)
|
||||
{
|
||||
if (__builtin_constant_p(bitnr) && bitnr > 12)
|
||||
BUILD_BUG();
|
||||
|
||||
/* Bits marked Ignored/AVL in the specification */
|
||||
switch (bitnr) {
|
||||
case 0:
|
||||
return BIT(9);
|
||||
case 1:
|
||||
return BIT(11);
|
||||
case 2 ... 12:
|
||||
return BIT_ULL((bitnr - 2) + 52);
|
||||
/* Some bits in 8,6,4,3 are available in some entries */
|
||||
default:
|
||||
PT_WARN_ON(true);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#define pt_sw_bit x86_64_pt_sw_bit
|
||||
|
||||
/* --- iommu */
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#define pt_iommu_table pt_iommu_x86_64
|
||||
|
||||
/* The common struct is in the per-format common struct */
|
||||
static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
|
||||
{
|
||||
return &container_of(iommu_table, struct pt_iommu_table, iommu)
|
||||
->x86_64_pt.common;
|
||||
}
|
||||
|
||||
static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
|
||||
{
|
||||
return &container_of(common, struct pt_iommu_table, x86_64_pt.common)
|
||||
->iommu;
|
||||
}
|
||||
|
||||
static inline int x86_64_pt_iommu_set_prot(struct pt_common *common,
|
||||
struct pt_write_attrs *attrs,
|
||||
unsigned int iommu_prot)
|
||||
{
|
||||
u64 pte;
|
||||
|
||||
pte = X86_64_FMT_U | X86_64_FMT_A;
|
||||
if (iommu_prot & IOMMU_WRITE)
|
||||
pte |= X86_64_FMT_RW | X86_64_FMT_D;
|
||||
|
||||
/*
|
||||
* Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
|
||||
* control this. For now if the tables use sme_set then so do the ptes.
|
||||
*/
|
||||
if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
|
||||
pte = __sme_set(pte);
|
||||
|
||||
attrs->descriptor_bits = pte;
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_set_prot x86_64_pt_iommu_set_prot
|
||||
|
||||
static inline int
|
||||
x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table,
|
||||
const struct pt_iommu_x86_64_cfg *cfg)
|
||||
{
|
||||
struct pt_x86_64 *table = &iommu_table->x86_64_pt;
|
||||
|
||||
if (cfg->top_level < 3 || cfg->top_level > 4)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
pt_top_set_level(&table->common, cfg->top_level);
|
||||
|
||||
table->common.max_oasz_lg2 =
|
||||
min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
|
||||
return 0;
|
||||
}
|
||||
#define pt_iommu_fmt_init x86_64_pt_iommu_fmt_init
|
||||
|
||||
static inline void
|
||||
x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table,
|
||||
const struct pt_range *top_range,
|
||||
struct pt_iommu_x86_64_hw_info *info)
|
||||
{
|
||||
info->gcr3_pt = virt_to_phys(top_range->top_table);
|
||||
PT_WARN_ON(info->gcr3_pt & ~PT_TOP_PHYS_MASK);
|
||||
info->levels = top_range->top_level + 1;
|
||||
}
|
||||
#define pt_iommu_fmt_hw_info x86_64_pt_iommu_fmt_hw_info
|
||||
|
||||
#if defined(GENERIC_PT_KUNIT)
|
||||
static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = {
|
||||
[0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
|
||||
.common.hw_max_vasz_lg2 = 48, .top_level = 3 },
|
||||
[1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
|
||||
.common.hw_max_vasz_lg2 = 57, .top_level = 4 },
|
||||
/* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */
|
||||
[2] = { .common.hw_max_vasz_lg2 = 47, .top_level = 3 },
|
||||
[3] = { .common.hw_max_vasz_lg2 = 56, .top_level = 4},
|
||||
};
|
||||
#define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs
|
||||
enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_SIGN_EXTEND)};
|
||||
#endif
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,823 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Test the format API directly.
|
||||
*
|
||||
*/
|
||||
#include "kunit_iommu.h"
|
||||
#include "pt_iter.h"
|
||||
|
||||
static void do_map(struct kunit *test, pt_vaddr_t va, pt_oaddr_t pa,
|
||||
pt_vaddr_t len)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
int ret;
|
||||
|
||||
KUNIT_ASSERT_EQ(test, len, (size_t)len);
|
||||
|
||||
ret = iommu_map(&priv->domain, va, pa, len, IOMMU_READ | IOMMU_WRITE,
|
||||
GFP_KERNEL);
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "map_pages", ret);
|
||||
}
|
||||
|
||||
#define KUNIT_ASSERT_PT_LOAD(test, pts, entry) \
|
||||
({ \
|
||||
pt_load_entry(pts); \
|
||||
KUNIT_ASSERT_EQ(test, (pts)->type, entry); \
|
||||
})
|
||||
|
||||
struct check_levels_arg {
|
||||
struct kunit *test;
|
||||
void *fn_arg;
|
||||
void (*fn)(struct kunit *test, struct pt_state *pts, void *arg);
|
||||
};
|
||||
|
||||
static int __check_all_levels(struct pt_range *range, void *arg,
|
||||
unsigned int level, struct pt_table_p *table)
|
||||
{
|
||||
struct pt_state pts = pt_init(range, level, table);
|
||||
struct check_levels_arg *chk = arg;
|
||||
struct kunit *test = chk->test;
|
||||
int ret;
|
||||
|
||||
_pt_iter_first(&pts);
|
||||
|
||||
|
||||
/*
|
||||
* If we were able to use the full VA space this should always be the
|
||||
* last index in each table.
|
||||
*/
|
||||
if (!(IS_32BIT && range->max_vasz_lg2 > 32)) {
|
||||
if (pt_feature(range->common, PT_FEAT_SIGN_EXTEND) &&
|
||||
pts.level == pts.range->top_level)
|
||||
KUNIT_ASSERT_EQ(test, pts.index,
|
||||
log2_to_int(range->max_vasz_lg2 - 1 -
|
||||
pt_table_item_lg2sz(&pts)) -
|
||||
1);
|
||||
else
|
||||
KUNIT_ASSERT_EQ(test, pts.index,
|
||||
log2_to_int(pt_table_oa_lg2sz(&pts) -
|
||||
pt_table_item_lg2sz(&pts)) -
|
||||
1);
|
||||
}
|
||||
|
||||
if (pt_can_have_table(&pts)) {
|
||||
pt_load_single_entry(&pts);
|
||||
KUNIT_ASSERT_EQ(test, pts.type, PT_ENTRY_TABLE);
|
||||
ret = pt_descend(&pts, arg, __check_all_levels);
|
||||
KUNIT_ASSERT_EQ(test, ret, 0);
|
||||
|
||||
/* Index 0 is used by the test */
|
||||
if (IS_32BIT && !pts.index)
|
||||
return 0;
|
||||
KUNIT_ASSERT_NE(chk->test, pts.index, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* A format should not create a table with only one entry, at least this
|
||||
* test approach won't work.
|
||||
*/
|
||||
KUNIT_ASSERT_GT(chk->test, pts.end_index, 1);
|
||||
|
||||
/*
|
||||
* For increase top we end up using index 0 for the original top's tree,
|
||||
* so use index 1 for testing instead.
|
||||
*/
|
||||
pts.index = 0;
|
||||
pt_index_to_va(&pts);
|
||||
pt_load_single_entry(&pts);
|
||||
if (pts.type == PT_ENTRY_TABLE && pts.end_index > 2) {
|
||||
pts.index = 1;
|
||||
pt_index_to_va(&pts);
|
||||
}
|
||||
(*chk->fn)(chk->test, &pts, chk->fn_arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call fn for each level in the table with a pts setup to index 0 in a table
|
||||
* for that level. This allows writing tests that run on every level.
|
||||
* The test can use every index in the table except the last one.
|
||||
*/
|
||||
static void check_all_levels(struct kunit *test,
|
||||
void (*fn)(struct kunit *test,
|
||||
struct pt_state *pts, void *arg),
|
||||
void *fn_arg)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
struct check_levels_arg chk = {
|
||||
.test = test,
|
||||
.fn = fn,
|
||||
.fn_arg = fn_arg,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (pt_feature(priv->common, PT_FEAT_DYNAMIC_TOP) &&
|
||||
priv->common->max_vasz_lg2 > range.max_vasz_lg2)
|
||||
range.last_va = fvalog2_set_mod_max(range.va,
|
||||
priv->common->max_vasz_lg2);
|
||||
|
||||
/*
|
||||
* Map a page at the highest VA, this will populate all the levels so we
|
||||
* can then iterate over them. Index 0 will be used for testing.
|
||||
*/
|
||||
if (IS_32BIT && range.max_vasz_lg2 > 32)
|
||||
range.last_va = (u32)range.last_va;
|
||||
range.va = range.last_va - (priv->smallest_pgsz - 1);
|
||||
do_map(test, range.va, 0, priv->smallest_pgsz);
|
||||
|
||||
range = pt_make_range(priv->common, range.va, range.last_va);
|
||||
ret = pt_walk_range(&range, __check_all_levels, &chk);
|
||||
KUNIT_ASSERT_EQ(test, ret, 0);
|
||||
}
|
||||
|
||||
static void test_init(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
|
||||
/* Fixture does the setup */
|
||||
KUNIT_ASSERT_NE(test, priv->info.pgsize_bitmap, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Basic check that the log2_* functions are working, especially at the integer
|
||||
* limits.
|
||||
*/
|
||||
static void test_bitops(struct kunit *test)
|
||||
{
|
||||
int i;
|
||||
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u32, 0), 0);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u32, 1), 1);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u32, BIT(2)), 3);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u32, U32_MAX), 32);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u64, 0), 0);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u64, 1), 1);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u64, BIT(2)), 3);
|
||||
KUNIT_ASSERT_EQ(test, fls_t(u64, U64_MAX), 64);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u32, 1), 0);
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u32, BIT(2)), 2);
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u32, BIT(31)), 31);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u64, 1), 0);
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u64, BIT(2)), 2);
|
||||
KUNIT_ASSERT_EQ(test, ffs_t(u64, BIT_ULL(63)), 63);
|
||||
|
||||
for (i = 0; i != 31; i++)
|
||||
KUNIT_ASSERT_EQ(test, ffz_t(u64, BIT_ULL(i) - 1), i);
|
||||
|
||||
for (i = 0; i != 63; i++)
|
||||
KUNIT_ASSERT_EQ(test, ffz_t(u64, BIT_ULL(i) - 1), i);
|
||||
|
||||
for (i = 0; i != 32; i++) {
|
||||
u64 val = get_random_u64();
|
||||
|
||||
KUNIT_ASSERT_EQ(test, log2_mod_t(u32, val, ffs_t(u32, val)), 0);
|
||||
KUNIT_ASSERT_EQ(test, log2_mod_t(u64, val, ffs_t(u64, val)), 0);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, log2_mod_t(u32, val, ffz_t(u32, val)),
|
||||
log2_to_max_int_t(u32, ffz_t(u32, val)));
|
||||
KUNIT_ASSERT_EQ(test, log2_mod_t(u64, val, ffz_t(u64, val)),
|
||||
log2_to_max_int_t(u64, ffz_t(u64, val)));
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int ref_best_pgsize(pt_vaddr_t pgsz_bitmap, pt_vaddr_t va,
|
||||
pt_vaddr_t last_va, pt_oaddr_t oa)
|
||||
{
|
||||
pt_vaddr_t pgsz_lg2;
|
||||
|
||||
/* Brute force the constraints described in pt_compute_best_pgsize() */
|
||||
for (pgsz_lg2 = PT_VADDR_MAX_LG2 - 1; pgsz_lg2 != 0; pgsz_lg2--) {
|
||||
if ((pgsz_bitmap & log2_to_int(pgsz_lg2)) &&
|
||||
log2_mod(va, pgsz_lg2) == 0 &&
|
||||
oalog2_mod(oa, pgsz_lg2) == 0 &&
|
||||
va + log2_to_int(pgsz_lg2) - 1 <= last_va &&
|
||||
log2_div_eq(va, va + log2_to_int(pgsz_lg2) - 1, pgsz_lg2) &&
|
||||
oalog2_div_eq(oa, oa + log2_to_int(pgsz_lg2) - 1, pgsz_lg2))
|
||||
return pgsz_lg2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that the bit logic in pt_compute_best_pgsize() works. */
|
||||
static void test_best_pgsize(struct kunit *test)
|
||||
{
|
||||
unsigned int a_lg2;
|
||||
unsigned int b_lg2;
|
||||
unsigned int c_lg2;
|
||||
|
||||
/* Try random prefixes with every suffix combination */
|
||||
for (a_lg2 = 1; a_lg2 != 10; a_lg2++) {
|
||||
for (b_lg2 = 1; b_lg2 != 10; b_lg2++) {
|
||||
for (c_lg2 = 1; c_lg2 != 10; c_lg2++) {
|
||||
pt_vaddr_t pgsz_bitmap = get_random_u64();
|
||||
pt_vaddr_t va = get_random_u64() << a_lg2;
|
||||
pt_oaddr_t oa = get_random_u64() << b_lg2;
|
||||
pt_vaddr_t last_va = log2_set_mod_max(
|
||||
get_random_u64(), c_lg2);
|
||||
|
||||
if (va > last_va)
|
||||
swap(va, last_va);
|
||||
KUNIT_ASSERT_EQ(
|
||||
test,
|
||||
pt_compute_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa),
|
||||
ref_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 0 prefix, every suffix */
|
||||
for (c_lg2 = 1; c_lg2 != PT_VADDR_MAX_LG2 - 1; c_lg2++) {
|
||||
pt_vaddr_t pgsz_bitmap = get_random_u64();
|
||||
pt_vaddr_t va = 0;
|
||||
pt_oaddr_t oa = 0;
|
||||
pt_vaddr_t last_va = log2_set_mod_max(0, c_lg2);
|
||||
|
||||
KUNIT_ASSERT_EQ(test,
|
||||
pt_compute_best_pgsize(pgsz_bitmap, va, last_va,
|
||||
oa),
|
||||
ref_best_pgsize(pgsz_bitmap, va, last_va, oa));
|
||||
}
|
||||
|
||||
/* 1's prefix, every suffix */
|
||||
for (a_lg2 = 1; a_lg2 != 10; a_lg2++) {
|
||||
for (b_lg2 = 1; b_lg2 != 10; b_lg2++) {
|
||||
for (c_lg2 = 1; c_lg2 != 10; c_lg2++) {
|
||||
pt_vaddr_t pgsz_bitmap = get_random_u64();
|
||||
pt_vaddr_t va = PT_VADDR_MAX << a_lg2;
|
||||
pt_oaddr_t oa = PT_VADDR_MAX << b_lg2;
|
||||
pt_vaddr_t last_va = PT_VADDR_MAX;
|
||||
|
||||
KUNIT_ASSERT_EQ(
|
||||
test,
|
||||
pt_compute_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa),
|
||||
ref_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* pgsize_bitmap is always 0 */
|
||||
for (a_lg2 = 1; a_lg2 != 10; a_lg2++) {
|
||||
for (b_lg2 = 1; b_lg2 != 10; b_lg2++) {
|
||||
for (c_lg2 = 1; c_lg2 != 10; c_lg2++) {
|
||||
pt_vaddr_t pgsz_bitmap = 0;
|
||||
pt_vaddr_t va = get_random_u64() << a_lg2;
|
||||
pt_oaddr_t oa = get_random_u64() << b_lg2;
|
||||
pt_vaddr_t last_va = log2_set_mod_max(
|
||||
get_random_u64(), c_lg2);
|
||||
|
||||
if (va > last_va)
|
||||
swap(va, last_va);
|
||||
KUNIT_ASSERT_EQ(
|
||||
test,
|
||||
pt_compute_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa),
|
||||
0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeof(pt_vaddr_t) <= 4)
|
||||
return;
|
||||
|
||||
/* over 32 bit page sizes */
|
||||
for (a_lg2 = 32; a_lg2 != 42; a_lg2++) {
|
||||
for (b_lg2 = 32; b_lg2 != 42; b_lg2++) {
|
||||
for (c_lg2 = 32; c_lg2 != 42; c_lg2++) {
|
||||
pt_vaddr_t pgsz_bitmap = get_random_u64();
|
||||
pt_vaddr_t va = get_random_u64() << a_lg2;
|
||||
pt_oaddr_t oa = get_random_u64() << b_lg2;
|
||||
pt_vaddr_t last_va = log2_set_mod_max(
|
||||
get_random_u64(), c_lg2);
|
||||
|
||||
if (va > last_va)
|
||||
swap(va, last_va);
|
||||
KUNIT_ASSERT_EQ(
|
||||
test,
|
||||
pt_compute_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa),
|
||||
ref_best_pgsize(pgsz_bitmap, va,
|
||||
last_va, oa));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that pt_install_table() and pt_table_pa() match
|
||||
*/
|
||||
static void test_lvl_table_ptr(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
pt_oaddr_t paddr =
|
||||
log2_set_mod(priv->test_oa, 0, priv->smallest_pgsz_lg2);
|
||||
struct pt_write_attrs attrs = {};
|
||||
|
||||
if (!pt_can_have_table(pts))
|
||||
return;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "pt_iommu_set_prot",
|
||||
pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
IOMMU_READ));
|
||||
|
||||
pt_load_single_entry(pts);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
|
||||
KUNIT_ASSERT_TRUE(test, pt_install_table(pts, paddr, &attrs));
|
||||
|
||||
/* A second install should pass because install updates pts->entry. */
|
||||
KUNIT_ASSERT_EQ(test, pt_install_table(pts, paddr, &attrs), true);
|
||||
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_TABLE);
|
||||
KUNIT_ASSERT_EQ(test, pt_table_pa(pts), paddr);
|
||||
|
||||
pt_clear_entries(pts, ilog2(1));
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
}
|
||||
|
||||
static void test_table_ptr(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_table_ptr, NULL);
|
||||
}
|
||||
|
||||
struct lvl_radix_arg {
|
||||
pt_vaddr_t vbits;
|
||||
};
|
||||
|
||||
/*
|
||||
* Check pt_table_oa_lg2sz() and pt_table_item_lg2sz() they need to decode a
|
||||
* continuous list of VA across all the levels that covers the entire advertised
|
||||
* VA space.
|
||||
*/
|
||||
static void test_lvl_radix(struct kunit *test, struct pt_state *pts, void *arg)
|
||||
{
|
||||
unsigned int table_lg2sz = pt_table_oa_lg2sz(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
struct lvl_radix_arg *radix = arg;
|
||||
|
||||
/* Every bit below us is decoded */
|
||||
KUNIT_ASSERT_EQ(test, log2_set_mod_max(0, isz_lg2), radix->vbits);
|
||||
|
||||
/* We are not decoding bits someone else is */
|
||||
KUNIT_ASSERT_EQ(test, log2_div(radix->vbits, isz_lg2), 0);
|
||||
|
||||
/* Can't decode past the pt_vaddr_t size */
|
||||
KUNIT_ASSERT_LE(test, table_lg2sz, PT_VADDR_MAX_LG2);
|
||||
KUNIT_ASSERT_EQ(test, fvalog2_div(table_lg2sz, PT_MAX_VA_ADDRESS_LG2),
|
||||
0);
|
||||
|
||||
radix->vbits = fvalog2_set_mod_max(0, table_lg2sz);
|
||||
}
|
||||
|
||||
static void test_max_va(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
|
||||
KUNIT_ASSERT_GE(test, priv->common->max_vasz_lg2, range.max_vasz_lg2);
|
||||
}
|
||||
|
||||
static void test_table_radix(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct lvl_radix_arg radix = { .vbits = priv->smallest_pgsz - 1 };
|
||||
struct pt_range range;
|
||||
|
||||
check_all_levels(test, test_lvl_radix, &radix);
|
||||
|
||||
range = pt_top_range(priv->common);
|
||||
if (range.max_vasz_lg2 == PT_VADDR_MAX_LG2) {
|
||||
KUNIT_ASSERT_EQ(test, radix.vbits, PT_VADDR_MAX);
|
||||
} else {
|
||||
if (!IS_32BIT)
|
||||
KUNIT_ASSERT_EQ(test,
|
||||
log2_set_mod_max(0, range.max_vasz_lg2),
|
||||
radix.vbits);
|
||||
KUNIT_ASSERT_EQ(test, log2_div(radix.vbits, range.max_vasz_lg2),
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int safe_pt_num_items_lg2(const struct pt_state *pts)
|
||||
{
|
||||
struct pt_range top_range = pt_top_range(pts->range->common);
|
||||
struct pt_state top_pts = pt_init_top(&top_range);
|
||||
|
||||
/*
|
||||
* Avoid calling pt_num_items_lg2() on the top, instead we can derive
|
||||
* the size of the top table from the top range.
|
||||
*/
|
||||
if (pts->level == top_range.top_level)
|
||||
return ilog2(pt_range_to_end_index(&top_pts));
|
||||
return pt_num_items_lg2(pts);
|
||||
}
|
||||
|
||||
static void test_lvl_possible_sizes(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
unsigned int num_items_lg2 = safe_pt_num_items_lg2(pts);
|
||||
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
|
||||
if (!pt_can_have_leaf(pts)) {
|
||||
KUNIT_ASSERT_EQ(test, pgsize_bitmap, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* No bits for sizes that would be outside this table */
|
||||
KUNIT_ASSERT_EQ(test, log2_mod(pgsize_bitmap, isz_lg2), 0);
|
||||
KUNIT_ASSERT_EQ(
|
||||
test, fvalog2_div(pgsize_bitmap, num_items_lg2 + isz_lg2), 0);
|
||||
|
||||
/*
|
||||
* Non contiguous must be supported. AMDv1 has a HW bug where it does
|
||||
* not support it on one of the levels.
|
||||
*/
|
||||
if ((u64)pgsize_bitmap != 0xff0000000000ULL ||
|
||||
strcmp(__stringify(PTPFX_RAW), "amdv1") != 0)
|
||||
KUNIT_ASSERT_TRUE(test, pgsize_bitmap & log2_to_int(isz_lg2));
|
||||
else
|
||||
KUNIT_ASSERT_NE(test, pgsize_bitmap, 0);
|
||||
|
||||
/* A contiguous entry should not span the whole table */
|
||||
if (num_items_lg2 + isz_lg2 != PT_VADDR_MAX_LG2)
|
||||
KUNIT_ASSERT_FALSE(
|
||||
test,
|
||||
pgsize_bitmap & log2_to_int(num_items_lg2 + isz_lg2));
|
||||
}
|
||||
|
||||
static void test_entry_possible_sizes(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_possible_sizes, NULL);
|
||||
}
|
||||
|
||||
static void sweep_all_pgsizes(struct kunit *test, struct pt_state *pts,
|
||||
struct pt_write_attrs *attrs,
|
||||
pt_oaddr_t test_oaddr)
|
||||
{
|
||||
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
unsigned int len_lg2;
|
||||
|
||||
if (pts->index != 0)
|
||||
return;
|
||||
|
||||
for (len_lg2 = 0; len_lg2 < PT_VADDR_MAX_LG2 - 1; len_lg2++) {
|
||||
struct pt_state sub_pts = *pts;
|
||||
pt_oaddr_t oaddr;
|
||||
|
||||
if (!(pgsize_bitmap & log2_to_int(len_lg2)))
|
||||
continue;
|
||||
|
||||
oaddr = log2_set_mod(test_oaddr, 0, len_lg2);
|
||||
pt_install_leaf_entry(pts, oaddr, len_lg2, attrs);
|
||||
/* Verify that every contiguous item translates correctly */
|
||||
for (sub_pts.index = 0;
|
||||
sub_pts.index != log2_to_int(len_lg2 - isz_lg2);
|
||||
sub_pts.index++) {
|
||||
KUNIT_ASSERT_PT_LOAD(test, &sub_pts, PT_ENTRY_OA);
|
||||
KUNIT_ASSERT_EQ(test, pt_item_oa(&sub_pts),
|
||||
oaddr + sub_pts.index *
|
||||
oalog2_mul(1, isz_lg2));
|
||||
KUNIT_ASSERT_EQ(test, pt_entry_oa(&sub_pts), oaddr);
|
||||
KUNIT_ASSERT_EQ(test, pt_entry_num_contig_lg2(&sub_pts),
|
||||
len_lg2 - isz_lg2);
|
||||
}
|
||||
|
||||
pt_clear_entries(pts, len_lg2 - isz_lg2);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that pt_install_leaf_entry() and pt_entry_oa() match.
|
||||
* Check that pt_clear_entries() works.
|
||||
*/
|
||||
static void test_lvl_entry_oa(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
unsigned int max_oa_lg2 = pts->range->common->max_oasz_lg2;
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_write_attrs attrs = {};
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "pt_iommu_set_prot",
|
||||
pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
IOMMU_READ));
|
||||
|
||||
sweep_all_pgsizes(test, pts, &attrs, priv->test_oa);
|
||||
|
||||
/* Check that the table can store the boundary OAs */
|
||||
sweep_all_pgsizes(test, pts, &attrs, 0);
|
||||
if (max_oa_lg2 == PT_OADDR_MAX_LG2)
|
||||
sweep_all_pgsizes(test, pts, &attrs, PT_OADDR_MAX);
|
||||
else
|
||||
sweep_all_pgsizes(test, pts, &attrs,
|
||||
oalog2_to_max_int(max_oa_lg2));
|
||||
}
|
||||
|
||||
static void test_entry_oa(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_entry_oa, NULL);
|
||||
}
|
||||
|
||||
/* Test pt_attr_from_entry() */
|
||||
static void test_lvl_attr_from_entry(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
unsigned int len_lg2;
|
||||
unsigned int prot;
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return;
|
||||
|
||||
for (len_lg2 = 0; len_lg2 < PT_VADDR_MAX_LG2; len_lg2++) {
|
||||
if (!(pgsize_bitmap & log2_to_int(len_lg2)))
|
||||
continue;
|
||||
for (prot = 0; prot <= (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |
|
||||
IOMMU_NOEXEC | IOMMU_MMIO);
|
||||
prot++) {
|
||||
pt_oaddr_t oaddr;
|
||||
struct pt_write_attrs attrs = {};
|
||||
u64 good_entry;
|
||||
|
||||
/*
|
||||
* If the format doesn't support this combination of
|
||||
* prot bits skip it
|
||||
*/
|
||||
if (pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
prot)) {
|
||||
/* But RW has to be supported */
|
||||
KUNIT_ASSERT_NE(test, prot,
|
||||
IOMMU_READ | IOMMU_WRITE);
|
||||
continue;
|
||||
}
|
||||
|
||||
oaddr = log2_set_mod(priv->test_oa, 0, len_lg2);
|
||||
pt_install_leaf_entry(pts, oaddr, len_lg2, &attrs);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_OA);
|
||||
|
||||
good_entry = pts->entry;
|
||||
|
||||
memset(&attrs, 0, sizeof(attrs));
|
||||
pt_attr_from_entry(pts, &attrs);
|
||||
|
||||
pt_clear_entries(pts, len_lg2 - isz_lg2);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
|
||||
pt_install_leaf_entry(pts, oaddr, len_lg2, &attrs);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_OA);
|
||||
|
||||
/*
|
||||
* The descriptor produced by pt_attr_from_entry()
|
||||
* produce an identical entry value when re-written
|
||||
*/
|
||||
KUNIT_ASSERT_EQ(test, good_entry, pts->entry);
|
||||
|
||||
pt_clear_entries(pts, len_lg2 - isz_lg2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_attr_from_entry(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_attr_from_entry, NULL);
|
||||
}
|
||||
|
||||
static void test_lvl_dirty(struct kunit *test, struct pt_state *pts, void *arg)
|
||||
{
|
||||
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
unsigned int start_idx = pts->index;
|
||||
struct pt_write_attrs attrs = {};
|
||||
unsigned int len_lg2;
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "pt_iommu_set_prot",
|
||||
pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
IOMMU_READ | IOMMU_WRITE));
|
||||
|
||||
for (len_lg2 = 0; len_lg2 < PT_VADDR_MAX_LG2; len_lg2++) {
|
||||
pt_oaddr_t oaddr;
|
||||
unsigned int i;
|
||||
|
||||
if (!(pgsize_bitmap & log2_to_int(len_lg2)))
|
||||
continue;
|
||||
|
||||
oaddr = log2_set_mod(priv->test_oa, 0, len_lg2);
|
||||
pt_install_leaf_entry(pts, oaddr, len_lg2, &attrs);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_OA);
|
||||
|
||||
pt_load_entry(pts);
|
||||
pt_entry_make_write_clean(pts);
|
||||
pt_load_entry(pts);
|
||||
KUNIT_ASSERT_FALSE(test, pt_entry_is_write_dirty(pts));
|
||||
|
||||
for (i = 0; i != log2_to_int(len_lg2 - isz_lg2); i++) {
|
||||
/* dirty every contiguous entry */
|
||||
pts->index = start_idx + i;
|
||||
pt_load_entry(pts);
|
||||
KUNIT_ASSERT_TRUE(test, pt_entry_make_write_dirty(pts));
|
||||
pts->index = start_idx;
|
||||
pt_load_entry(pts);
|
||||
KUNIT_ASSERT_TRUE(test, pt_entry_is_write_dirty(pts));
|
||||
|
||||
pt_entry_make_write_clean(pts);
|
||||
pt_load_entry(pts);
|
||||
KUNIT_ASSERT_FALSE(test, pt_entry_is_write_dirty(pts));
|
||||
}
|
||||
|
||||
pt_clear_entries(pts, len_lg2 - isz_lg2);
|
||||
}
|
||||
}
|
||||
|
||||
static __maybe_unused void test_dirty(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
|
||||
if (!pt_dirty_supported(priv->common))
|
||||
kunit_skip(test,
|
||||
"Page table features do not support dirty tracking");
|
||||
|
||||
check_all_levels(test, test_lvl_dirty, NULL);
|
||||
}
|
||||
|
||||
static void test_lvl_sw_bit_leaf(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
struct pt_write_attrs attrs = {};
|
||||
unsigned int len_lg2;
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return;
|
||||
if (pts->index != 0)
|
||||
return;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "pt_iommu_set_prot",
|
||||
pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
IOMMU_READ));
|
||||
|
||||
for (len_lg2 = 0; len_lg2 < PT_VADDR_MAX_LG2 - 1; len_lg2++) {
|
||||
pt_oaddr_t paddr = log2_set_mod(priv->test_oa, 0, len_lg2);
|
||||
struct pt_write_attrs new_attrs = {};
|
||||
unsigned int bitnr;
|
||||
|
||||
if (!(pgsize_bitmap & log2_to_int(len_lg2)))
|
||||
continue;
|
||||
|
||||
pt_install_leaf_entry(pts, paddr, len_lg2, &attrs);
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common);
|
||||
bitnr++)
|
||||
KUNIT_ASSERT_FALSE(test,
|
||||
pt_test_sw_bit_acquire(pts, bitnr));
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common);
|
||||
bitnr++) {
|
||||
KUNIT_ASSERT_FALSE(test,
|
||||
pt_test_sw_bit_acquire(pts, bitnr));
|
||||
pt_set_sw_bit_release(pts, bitnr);
|
||||
KUNIT_ASSERT_TRUE(test,
|
||||
pt_test_sw_bit_acquire(pts, bitnr));
|
||||
}
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common);
|
||||
bitnr++)
|
||||
KUNIT_ASSERT_TRUE(test,
|
||||
pt_test_sw_bit_acquire(pts, bitnr));
|
||||
|
||||
KUNIT_ASSERT_EQ(test, pt_item_oa(pts), paddr);
|
||||
|
||||
/* SW bits didn't leak into the attrs */
|
||||
pt_attr_from_entry(pts, &new_attrs);
|
||||
KUNIT_ASSERT_MEMEQ(test, &new_attrs, &attrs, sizeof(attrs));
|
||||
|
||||
pt_clear_entries(pts, len_lg2 - isz_lg2);
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
}
|
||||
}
|
||||
|
||||
static __maybe_unused void test_sw_bit_leaf(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_sw_bit_leaf, NULL);
|
||||
}
|
||||
|
||||
static void test_lvl_sw_bit_table(struct kunit *test, struct pt_state *pts,
|
||||
void *arg)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_write_attrs attrs = {};
|
||||
pt_oaddr_t paddr =
|
||||
log2_set_mod(priv->test_oa, 0, priv->smallest_pgsz_lg2);
|
||||
unsigned int bitnr;
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return;
|
||||
if (pts->index != 0)
|
||||
return;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "pt_iommu_set_prot",
|
||||
pt_iommu_set_prot(pts->range->common, &attrs,
|
||||
IOMMU_READ));
|
||||
|
||||
KUNIT_ASSERT_TRUE(test, pt_install_table(pts, paddr, &attrs));
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common); bitnr++)
|
||||
KUNIT_ASSERT_FALSE(test, pt_test_sw_bit_acquire(pts, bitnr));
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common); bitnr++) {
|
||||
KUNIT_ASSERT_FALSE(test, pt_test_sw_bit_acquire(pts, bitnr));
|
||||
pt_set_sw_bit_release(pts, bitnr);
|
||||
KUNIT_ASSERT_TRUE(test, pt_test_sw_bit_acquire(pts, bitnr));
|
||||
}
|
||||
|
||||
for (bitnr = 0; bitnr <= pt_max_sw_bit(pts->range->common); bitnr++)
|
||||
KUNIT_ASSERT_TRUE(test, pt_test_sw_bit_acquire(pts, bitnr));
|
||||
|
||||
KUNIT_ASSERT_EQ(test, pt_table_pa(pts), paddr);
|
||||
|
||||
pt_clear_entries(pts, ilog2(1));
|
||||
KUNIT_ASSERT_PT_LOAD(test, pts, PT_ENTRY_EMPTY);
|
||||
}
|
||||
|
||||
static __maybe_unused void test_sw_bit_table(struct kunit *test)
|
||||
{
|
||||
check_all_levels(test, test_lvl_sw_bit_table, NULL);
|
||||
}
|
||||
|
||||
static struct kunit_case generic_pt_test_cases[] = {
|
||||
KUNIT_CASE_FMT(test_init),
|
||||
KUNIT_CASE_FMT(test_bitops),
|
||||
KUNIT_CASE_FMT(test_best_pgsize),
|
||||
KUNIT_CASE_FMT(test_table_ptr),
|
||||
KUNIT_CASE_FMT(test_max_va),
|
||||
KUNIT_CASE_FMT(test_table_radix),
|
||||
KUNIT_CASE_FMT(test_entry_possible_sizes),
|
||||
KUNIT_CASE_FMT(test_entry_oa),
|
||||
KUNIT_CASE_FMT(test_attr_from_entry),
|
||||
#ifdef pt_entry_is_write_dirty
|
||||
KUNIT_CASE_FMT(test_dirty),
|
||||
#endif
|
||||
#ifdef pt_sw_bit
|
||||
KUNIT_CASE_FMT(test_sw_bit_leaf),
|
||||
KUNIT_CASE_FMT(test_sw_bit_table),
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
||||
static int pt_kunit_generic_pt_init(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv;
|
||||
int ret;
|
||||
|
||||
priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
ret = pt_kunit_priv_init(test, priv);
|
||||
if (ret) {
|
||||
kunit_kfree(test, priv);
|
||||
return ret;
|
||||
}
|
||||
test->priv = priv;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pt_kunit_generic_pt_exit(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
|
||||
if (!test->priv)
|
||||
return;
|
||||
|
||||
pt_iommu_deinit(priv->iommu);
|
||||
kunit_kfree(test, test->priv);
|
||||
}
|
||||
|
||||
static struct kunit_suite NS(generic_pt_suite) = {
|
||||
.name = __stringify(NS(fmt_test)),
|
||||
.init = pt_kunit_generic_pt_init,
|
||||
.exit = pt_kunit_generic_pt_exit,
|
||||
.test_cases = generic_pt_test_cases,
|
||||
};
|
||||
kunit_test_suites(&NS(generic_pt_suite));
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#ifndef __GENERIC_PT_KUNIT_IOMMU_H
|
||||
#define __GENERIC_PT_KUNIT_IOMMU_H
|
||||
|
||||
#define GENERIC_PT_KUNIT 1
|
||||
#include <kunit/device.h>
|
||||
#include <kunit/test.h>
|
||||
#include "../iommu-pages.h"
|
||||
#include "pt_iter.h"
|
||||
|
||||
#define pt_iommu_table_cfg CONCATENATE(pt_iommu_table, _cfg)
|
||||
#define pt_iommu_init CONCATENATE(CONCATENATE(pt_iommu_, PTPFX), init)
|
||||
int pt_iommu_init(struct pt_iommu_table *fmt_table,
|
||||
const struct pt_iommu_table_cfg *cfg, gfp_t gfp);
|
||||
|
||||
/* The format can provide a list of configurations it would like to test */
|
||||
#ifdef kunit_fmt_cfgs
|
||||
static const void *kunit_pt_gen_params_cfg(struct kunit *test, const void *prev,
|
||||
char *desc)
|
||||
{
|
||||
uintptr_t cfg_id = (uintptr_t)prev;
|
||||
|
||||
cfg_id++;
|
||||
if (cfg_id >= ARRAY_SIZE(kunit_fmt_cfgs) + 1)
|
||||
return NULL;
|
||||
snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s_cfg_%u",
|
||||
__stringify(PTPFX_RAW), (unsigned int)(cfg_id - 1));
|
||||
return (void *)cfg_id;
|
||||
}
|
||||
#define KUNIT_CASE_FMT(test_name) \
|
||||
KUNIT_CASE_PARAM(test_name, kunit_pt_gen_params_cfg)
|
||||
#else
|
||||
#define KUNIT_CASE_FMT(test_name) KUNIT_CASE(test_name)
|
||||
#endif
|
||||
|
||||
#define KUNIT_ASSERT_NO_ERRNO(test, ret) \
|
||||
KUNIT_ASSERT_EQ_MSG(test, ret, 0, KUNIT_SUBSUBTEST_INDENT "errno %pe", \
|
||||
ERR_PTR(ret))
|
||||
|
||||
#define KUNIT_ASSERT_NO_ERRNO_FN(test, fn, ret) \
|
||||
KUNIT_ASSERT_EQ_MSG(test, ret, 0, \
|
||||
KUNIT_SUBSUBTEST_INDENT "errno %pe from %s", \
|
||||
ERR_PTR(ret), fn)
|
||||
|
||||
/*
|
||||
* When the test is run on a 32 bit system unsigned long can be 32 bits. This
|
||||
* cause the iommu op signatures to be restricted to 32 bits. Meaning the test
|
||||
* has to be mindful not to create any VA's over the 32 bit limit. Reduce the
|
||||
* scope of the testing as the main purpose of checking on full 32 bit is to
|
||||
* look for 32bitism in the core code. Run the test on i386 with X86_PAE=y to
|
||||
* get the full coverage when dma_addr_t & phys_addr_t are 8 bytes
|
||||
*/
|
||||
#define IS_32BIT (sizeof(unsigned long) == 4)
|
||||
|
||||
struct kunit_iommu_priv {
|
||||
union {
|
||||
struct iommu_domain domain;
|
||||
struct pt_iommu_table fmt_table;
|
||||
};
|
||||
spinlock_t top_lock;
|
||||
struct device *dummy_dev;
|
||||
struct pt_iommu *iommu;
|
||||
struct pt_common *common;
|
||||
struct pt_iommu_table_cfg cfg;
|
||||
struct pt_iommu_info info;
|
||||
unsigned int smallest_pgsz_lg2;
|
||||
pt_vaddr_t smallest_pgsz;
|
||||
unsigned int largest_pgsz_lg2;
|
||||
pt_oaddr_t test_oa;
|
||||
pt_vaddr_t safe_pgsize_bitmap;
|
||||
unsigned long orig_nr_secondary_pagetable;
|
||||
|
||||
};
|
||||
PT_IOMMU_CHECK_DOMAIN(struct kunit_iommu_priv, fmt_table.iommu, domain);
|
||||
|
||||
static void pt_kunit_iotlb_sync(struct iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
iommu_put_pages_list(&gather->freelist);
|
||||
}
|
||||
|
||||
#define IOMMU_PT_DOMAIN_OPS1(x) IOMMU_PT_DOMAIN_OPS(x)
|
||||
static const struct iommu_domain_ops kunit_pt_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS1(PTPFX_RAW),
|
||||
.iotlb_sync = &pt_kunit_iotlb_sync,
|
||||
};
|
||||
|
||||
static void pt_kunit_change_top(struct pt_iommu *iommu_table,
|
||||
phys_addr_t top_paddr, unsigned int top_level)
|
||||
{
|
||||
}
|
||||
|
||||
static spinlock_t *pt_kunit_get_top_lock(struct pt_iommu *iommu_table)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = container_of(
|
||||
iommu_table, struct kunit_iommu_priv, fmt_table.iommu);
|
||||
|
||||
return &priv->top_lock;
|
||||
}
|
||||
|
||||
static const struct pt_iommu_driver_ops pt_kunit_driver_ops = {
|
||||
.change_top = &pt_kunit_change_top,
|
||||
.get_top_lock = &pt_kunit_get_top_lock,
|
||||
};
|
||||
|
||||
static int pt_kunit_priv_init(struct kunit *test, struct kunit_iommu_priv *priv)
|
||||
{
|
||||
unsigned int va_lg2sz;
|
||||
int ret;
|
||||
|
||||
/* Enough so the memory allocator works */
|
||||
priv->dummy_dev = kunit_device_register(test, "pt_kunit_dev");
|
||||
if (IS_ERR(priv->dummy_dev))
|
||||
return PTR_ERR(priv->dummy_dev);
|
||||
set_dev_node(priv->dummy_dev, NUMA_NO_NODE);
|
||||
|
||||
spin_lock_init(&priv->top_lock);
|
||||
|
||||
#ifdef kunit_fmt_cfgs
|
||||
priv->cfg = kunit_fmt_cfgs[((uintptr_t)test->param_value) - 1];
|
||||
/*
|
||||
* The format can set a list of features that the kunit_fmt_cfgs
|
||||
* controls, other features are default to on.
|
||||
*/
|
||||
priv->cfg.common.features |= PT_SUPPORTED_FEATURES &
|
||||
(~KUNIT_FMT_FEATURES);
|
||||
#else
|
||||
priv->cfg.common.features = PT_SUPPORTED_FEATURES;
|
||||
#endif
|
||||
|
||||
/* Defaults, for the kunit */
|
||||
if (!priv->cfg.common.hw_max_vasz_lg2)
|
||||
priv->cfg.common.hw_max_vasz_lg2 = PT_MAX_VA_ADDRESS_LG2;
|
||||
if (!priv->cfg.common.hw_max_oasz_lg2)
|
||||
priv->cfg.common.hw_max_oasz_lg2 = pt_max_oa_lg2(NULL);
|
||||
|
||||
priv->fmt_table.iommu.nid = NUMA_NO_NODE;
|
||||
priv->fmt_table.iommu.driver_ops = &pt_kunit_driver_ops;
|
||||
priv->fmt_table.iommu.iommu_device = priv->dummy_dev;
|
||||
priv->domain.ops = &kunit_pt_ops;
|
||||
ret = pt_iommu_init(&priv->fmt_table, &priv->cfg, GFP_KERNEL);
|
||||
if (ret) {
|
||||
if (ret == -EOVERFLOW)
|
||||
kunit_skip(
|
||||
test,
|
||||
"This configuration cannot be tested on 32 bit");
|
||||
return ret;
|
||||
}
|
||||
|
||||
priv->iommu = &priv->fmt_table.iommu;
|
||||
priv->common = common_from_iommu(&priv->fmt_table.iommu);
|
||||
priv->iommu->ops->get_info(priv->iommu, &priv->info);
|
||||
|
||||
/*
|
||||
* size_t is used to pass the mapping length, it can be 32 bit, truncate
|
||||
* the pagesizes so we don't use large sizes.
|
||||
*/
|
||||
priv->info.pgsize_bitmap = (size_t)priv->info.pgsize_bitmap;
|
||||
|
||||
priv->smallest_pgsz_lg2 = vaffs(priv->info.pgsize_bitmap);
|
||||
priv->smallest_pgsz = log2_to_int(priv->smallest_pgsz_lg2);
|
||||
priv->largest_pgsz_lg2 =
|
||||
vafls((dma_addr_t)priv->info.pgsize_bitmap) - 1;
|
||||
|
||||
priv->test_oa =
|
||||
oalog2_mod(0x74a71445deadbeef, priv->common->max_oasz_lg2);
|
||||
|
||||
/*
|
||||
* We run out of VA space if the mappings get too big, make something
|
||||
* smaller that can safely pass through dma_addr_t API.
|
||||
*/
|
||||
va_lg2sz = priv->common->max_vasz_lg2;
|
||||
if (IS_32BIT && va_lg2sz > 32)
|
||||
va_lg2sz = 32;
|
||||
priv->safe_pgsize_bitmap =
|
||||
log2_mod(priv->info.pgsize_bitmap, va_lg2sz - 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,487 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include "kunit_iommu.h"
|
||||
#include "pt_iter.h"
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
static void do_map(struct kunit *test, pt_vaddr_t va, pt_oaddr_t pa,
|
||||
pt_vaddr_t len);
|
||||
|
||||
struct count_valids {
|
||||
u64 per_size[PT_VADDR_MAX_LG2];
|
||||
};
|
||||
|
||||
static int __count_valids(struct pt_range *range, void *arg, unsigned int level,
|
||||
struct pt_table_p *table)
|
||||
{
|
||||
struct pt_state pts = pt_init(range, level, table);
|
||||
struct count_valids *valids = arg;
|
||||
|
||||
for_each_pt_level_entry(&pts) {
|
||||
if (pts.type == PT_ENTRY_TABLE) {
|
||||
pt_descend(&pts, arg, __count_valids);
|
||||
continue;
|
||||
}
|
||||
if (pts.type == PT_ENTRY_OA) {
|
||||
valids->per_size[pt_entry_oa_lg2sz(&pts)]++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of valid table entries. This counts contiguous entries as a single
|
||||
* valid.
|
||||
*/
|
||||
static unsigned int count_valids(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
struct count_valids valids = {};
|
||||
u64 total = 0;
|
||||
unsigned int i;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO(test,
|
||||
pt_walk_range(&range, __count_valids, &valids));
|
||||
|
||||
for (i = 0; i != ARRAY_SIZE(valids.per_size); i++)
|
||||
total += valids.per_size[i];
|
||||
return total;
|
||||
}
|
||||
|
||||
/* Only a single page size is present, count the number of valid entries */
|
||||
static unsigned int count_valids_single(struct kunit *test, pt_vaddr_t pgsz)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
struct count_valids valids = {};
|
||||
u64 total = 0;
|
||||
unsigned int i;
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO(test,
|
||||
pt_walk_range(&range, __count_valids, &valids));
|
||||
|
||||
for (i = 0; i != ARRAY_SIZE(valids.per_size); i++) {
|
||||
if ((1ULL << i) == pgsz)
|
||||
total = valids.per_size[i];
|
||||
else
|
||||
KUNIT_ASSERT_EQ(test, valids.per_size[i], 0);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
static void do_unmap(struct kunit *test, pt_vaddr_t va, pt_vaddr_t len)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
size_t ret;
|
||||
|
||||
ret = iommu_unmap(&priv->domain, va, len);
|
||||
KUNIT_ASSERT_EQ(test, ret, len);
|
||||
}
|
||||
|
||||
static void check_iova(struct kunit *test, pt_vaddr_t va, pt_oaddr_t pa,
|
||||
pt_vaddr_t len)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
pt_vaddr_t pfn = log2_div(va, priv->smallest_pgsz_lg2);
|
||||
pt_vaddr_t end_pfn = pfn + log2_div(len, priv->smallest_pgsz_lg2);
|
||||
|
||||
for (; pfn != end_pfn; pfn++) {
|
||||
phys_addr_t res = iommu_iova_to_phys(&priv->domain,
|
||||
pfn * priv->smallest_pgsz);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, res, (phys_addr_t)pa);
|
||||
if (res != pa)
|
||||
break;
|
||||
pa += priv->smallest_pgsz;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_increase_level(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_common *common = priv->common;
|
||||
|
||||
if (!pt_feature(common, PT_FEAT_DYNAMIC_TOP))
|
||||
kunit_skip(test, "PT_FEAT_DYNAMIC_TOP not set for this format");
|
||||
|
||||
if (IS_32BIT)
|
||||
kunit_skip(test, "Unable to test on 32bit");
|
||||
|
||||
KUNIT_ASSERT_GT(test, common->max_vasz_lg2,
|
||||
pt_top_range(common).max_vasz_lg2);
|
||||
|
||||
/* Add every possible level to the max */
|
||||
while (common->max_vasz_lg2 != pt_top_range(common).max_vasz_lg2) {
|
||||
struct pt_range top_range = pt_top_range(common);
|
||||
|
||||
if (top_range.va == 0)
|
||||
do_map(test, top_range.last_va + 1, 0,
|
||||
priv->smallest_pgsz);
|
||||
else
|
||||
do_map(test, top_range.va - priv->smallest_pgsz, 0,
|
||||
priv->smallest_pgsz);
|
||||
|
||||
KUNIT_ASSERT_EQ(test, pt_top_range(common).top_level,
|
||||
top_range.top_level + 1);
|
||||
KUNIT_ASSERT_GE(test, common->max_vasz_lg2,
|
||||
pt_top_range(common).max_vasz_lg2);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_map_simple(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
struct count_valids valids = {};
|
||||
pt_vaddr_t pgsize_bitmap = priv->safe_pgsize_bitmap;
|
||||
unsigned int pgsz_lg2;
|
||||
pt_vaddr_t cur_va;
|
||||
|
||||
/* Map every reported page size */
|
||||
cur_va = range.va + priv->smallest_pgsz * 256;
|
||||
for (pgsz_lg2 = 0; pgsz_lg2 != PT_VADDR_MAX_LG2; pgsz_lg2++) {
|
||||
pt_oaddr_t paddr = log2_set_mod(priv->test_oa, 0, pgsz_lg2);
|
||||
u64 len = log2_to_int(pgsz_lg2);
|
||||
|
||||
if (!(pgsize_bitmap & len))
|
||||
continue;
|
||||
|
||||
cur_va = ALIGN(cur_va, len);
|
||||
do_map(test, cur_va, paddr, len);
|
||||
if (len <= SZ_2G)
|
||||
check_iova(test, cur_va, paddr, len);
|
||||
cur_va += len;
|
||||
}
|
||||
|
||||
/* The read interface reports that every page size was created */
|
||||
range = pt_top_range(priv->common);
|
||||
KUNIT_ASSERT_NO_ERRNO(test,
|
||||
pt_walk_range(&range, __count_valids, &valids));
|
||||
for (pgsz_lg2 = 0; pgsz_lg2 != PT_VADDR_MAX_LG2; pgsz_lg2++) {
|
||||
if (pgsize_bitmap & (1ULL << pgsz_lg2))
|
||||
KUNIT_ASSERT_EQ(test, valids.per_size[pgsz_lg2], 1);
|
||||
else
|
||||
KUNIT_ASSERT_EQ(test, valids.per_size[pgsz_lg2], 0);
|
||||
}
|
||||
|
||||
/* Unmap works */
|
||||
range = pt_top_range(priv->common);
|
||||
cur_va = range.va + priv->smallest_pgsz * 256;
|
||||
for (pgsz_lg2 = 0; pgsz_lg2 != PT_VADDR_MAX_LG2; pgsz_lg2++) {
|
||||
u64 len = log2_to_int(pgsz_lg2);
|
||||
|
||||
if (!(pgsize_bitmap & len))
|
||||
continue;
|
||||
cur_va = ALIGN(cur_va, len);
|
||||
do_unmap(test, cur_va, len);
|
||||
cur_va += len;
|
||||
}
|
||||
KUNIT_ASSERT_EQ(test, count_valids(test), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test to convert a table pointer into an OA by mapping something small,
|
||||
* unmapping it so as to leave behind a table pointer, then mapping something
|
||||
* larger that will convert the table into an OA.
|
||||
*/
|
||||
static void test_map_table_to_oa(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
pt_vaddr_t limited_pgbitmap =
|
||||
priv->info.pgsize_bitmap % (IS_32BIT ? SZ_2G : SZ_16G);
|
||||
struct pt_range range = pt_top_range(priv->common);
|
||||
unsigned int pgsz_lg2;
|
||||
pt_vaddr_t max_pgsize;
|
||||
pt_vaddr_t cur_va;
|
||||
|
||||
max_pgsize = 1ULL << (vafls(limited_pgbitmap) - 1);
|
||||
KUNIT_ASSERT_TRUE(test, priv->info.pgsize_bitmap & max_pgsize);
|
||||
|
||||
for (pgsz_lg2 = 0; pgsz_lg2 != PT_VADDR_MAX_LG2; pgsz_lg2++) {
|
||||
pt_oaddr_t paddr = log2_set_mod(priv->test_oa, 0, pgsz_lg2);
|
||||
u64 len = log2_to_int(pgsz_lg2);
|
||||
pt_vaddr_t offset;
|
||||
|
||||
if (!(priv->info.pgsize_bitmap & len))
|
||||
continue;
|
||||
if (len > max_pgsize)
|
||||
break;
|
||||
|
||||
cur_va = ALIGN(range.va + priv->smallest_pgsz * 256,
|
||||
max_pgsize);
|
||||
for (offset = 0; offset != max_pgsize; offset += len)
|
||||
do_map(test, cur_va + offset, paddr + offset, len);
|
||||
check_iova(test, cur_va, paddr, max_pgsize);
|
||||
KUNIT_ASSERT_EQ(test, count_valids_single(test, len),
|
||||
log2_div(max_pgsize, pgsz_lg2));
|
||||
|
||||
if (len == max_pgsize) {
|
||||
do_unmap(test, cur_va, max_pgsize);
|
||||
} else {
|
||||
do_unmap(test, cur_va, max_pgsize / 2);
|
||||
for (offset = max_pgsize / 2; offset != max_pgsize;
|
||||
offset += len)
|
||||
do_unmap(test, cur_va + offset, len);
|
||||
}
|
||||
|
||||
KUNIT_ASSERT_EQ(test, count_valids(test), 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test unmapping a small page at the start of a large page. This always unmaps
|
||||
* the large page.
|
||||
*/
|
||||
static void test_unmap_split(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range top_range = pt_top_range(priv->common);
|
||||
pt_vaddr_t pgsize_bitmap = priv->safe_pgsize_bitmap;
|
||||
unsigned int pgsz_lg2;
|
||||
unsigned int count = 0;
|
||||
|
||||
for (pgsz_lg2 = 0; pgsz_lg2 != PT_VADDR_MAX_LG2; pgsz_lg2++) {
|
||||
pt_vaddr_t base_len = log2_to_int(pgsz_lg2);
|
||||
unsigned int next_pgsz_lg2;
|
||||
|
||||
if (!(pgsize_bitmap & base_len))
|
||||
continue;
|
||||
|
||||
for (next_pgsz_lg2 = pgsz_lg2 + 1;
|
||||
next_pgsz_lg2 != PT_VADDR_MAX_LG2; next_pgsz_lg2++) {
|
||||
pt_vaddr_t next_len = log2_to_int(next_pgsz_lg2);
|
||||
pt_vaddr_t vaddr = top_range.va;
|
||||
pt_oaddr_t paddr = 0;
|
||||
size_t gnmapped;
|
||||
|
||||
if (!(pgsize_bitmap & next_len))
|
||||
continue;
|
||||
|
||||
do_map(test, vaddr, paddr, next_len);
|
||||
gnmapped = iommu_unmap(&priv->domain, vaddr, base_len);
|
||||
KUNIT_ASSERT_EQ(test, gnmapped, next_len);
|
||||
|
||||
/* Make sure unmap doesn't keep going */
|
||||
do_map(test, vaddr, paddr, next_len);
|
||||
do_map(test, vaddr + next_len, paddr, next_len);
|
||||
gnmapped = iommu_unmap(&priv->domain, vaddr, base_len);
|
||||
KUNIT_ASSERT_EQ(test, gnmapped, next_len);
|
||||
gnmapped = iommu_unmap(&priv->domain, vaddr + next_len,
|
||||
next_len);
|
||||
KUNIT_ASSERT_EQ(test, gnmapped, next_len);
|
||||
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
kunit_skip(test, "Test needs two page sizes");
|
||||
}
|
||||
|
||||
static void unmap_collisions(struct kunit *test, struct maple_tree *mt,
|
||||
pt_vaddr_t start, pt_vaddr_t last)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
MA_STATE(mas, mt, start, last);
|
||||
void *entry;
|
||||
|
||||
mtree_lock(mt);
|
||||
mas_for_each(&mas, entry, last) {
|
||||
pt_vaddr_t mas_start = mas.index;
|
||||
pt_vaddr_t len = (mas.last - mas_start) + 1;
|
||||
pt_oaddr_t paddr;
|
||||
|
||||
mas_erase(&mas);
|
||||
mas_pause(&mas);
|
||||
mtree_unlock(mt);
|
||||
|
||||
paddr = oalog2_mod(mas_start, priv->common->max_oasz_lg2);
|
||||
check_iova(test, mas_start, paddr, len);
|
||||
do_unmap(test, mas_start, len);
|
||||
mtree_lock(mt);
|
||||
}
|
||||
mtree_unlock(mt);
|
||||
}
|
||||
|
||||
static void clamp_range(struct kunit *test, struct pt_range *range)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
|
||||
if (range->last_va - range->va > SZ_1G)
|
||||
range->last_va = range->va + SZ_1G;
|
||||
KUNIT_ASSERT_NE(test, range->last_va, PT_VADDR_MAX);
|
||||
if (range->va <= MAPLE_RESERVED_RANGE)
|
||||
range->va =
|
||||
ALIGN(MAPLE_RESERVED_RANGE, priv->smallest_pgsz);
|
||||
}
|
||||
|
||||
/*
|
||||
* Randomly map and unmap ranges that can large physical pages. If a random
|
||||
* range overlaps with existing ranges then unmap them. This hits all the
|
||||
* special cases.
|
||||
*/
|
||||
static void test_random_map(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range upper_range = pt_upper_range(priv->common);
|
||||
struct pt_range top_range = pt_top_range(priv->common);
|
||||
struct maple_tree mt;
|
||||
unsigned int iter;
|
||||
|
||||
mt_init(&mt);
|
||||
|
||||
/*
|
||||
* Shrink the range so randomization is more likely to have
|
||||
* intersections
|
||||
*/
|
||||
clamp_range(test, &top_range);
|
||||
clamp_range(test, &upper_range);
|
||||
|
||||
for (iter = 0; iter != 1000; iter++) {
|
||||
struct pt_range *range = &top_range;
|
||||
pt_oaddr_t paddr;
|
||||
pt_vaddr_t start;
|
||||
pt_vaddr_t end;
|
||||
int ret;
|
||||
|
||||
if (pt_feature(priv->common, PT_FEAT_SIGN_EXTEND) &&
|
||||
ULONG_MAX >= PT_VADDR_MAX && get_random_u32_inclusive(0, 1))
|
||||
range = &upper_range;
|
||||
|
||||
start = get_random_u32_below(
|
||||
min(U32_MAX, range->last_va - range->va));
|
||||
end = get_random_u32_below(
|
||||
min(U32_MAX, range->last_va - start));
|
||||
|
||||
start = ALIGN_DOWN(start, priv->smallest_pgsz);
|
||||
end = ALIGN(end, priv->smallest_pgsz);
|
||||
start += range->va;
|
||||
end += start;
|
||||
if (start < range->va || end > range->last_va + 1 ||
|
||||
start >= end)
|
||||
continue;
|
||||
|
||||
/* Try overmapping to test the failure handling */
|
||||
paddr = oalog2_mod(start, priv->common->max_oasz_lg2);
|
||||
ret = iommu_map(&priv->domain, start, paddr, end - start,
|
||||
IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
|
||||
if (ret) {
|
||||
KUNIT_ASSERT_EQ(test, ret, -EADDRINUSE);
|
||||
unmap_collisions(test, &mt, start, end - 1);
|
||||
do_map(test, start, paddr, end - start);
|
||||
}
|
||||
|
||||
KUNIT_ASSERT_NO_ERRNO_FN(test, "mtree_insert_range",
|
||||
mtree_insert_range(&mt, start, end - 1,
|
||||
XA_ZERO_ENTRY,
|
||||
GFP_KERNEL));
|
||||
|
||||
check_iova(test, start, paddr, end - start);
|
||||
if (iter % 100)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
unmap_collisions(test, &mt, 0, PT_VADDR_MAX);
|
||||
KUNIT_ASSERT_EQ(test, count_valids(test), 0);
|
||||
|
||||
mtree_destroy(&mt);
|
||||
}
|
||||
|
||||
/* See https://lore.kernel.org/r/b9b18a03-63a2-4065-a27e-d92dd5c860bc@amd.com */
|
||||
static void test_pgsize_boundary(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range top_range = pt_top_range(priv->common);
|
||||
|
||||
if (top_range.va != 0 || top_range.last_va < 0xfef9ffff ||
|
||||
priv->smallest_pgsz != SZ_4K)
|
||||
kunit_skip(test, "Format does not have the required range");
|
||||
|
||||
do_map(test, 0xfef80000, 0x208b95d000, 0xfef9ffff - 0xfef80000 + 1);
|
||||
}
|
||||
|
||||
/* See https://lore.kernel.org/r/20250826143816.38686-1-eugkoira@amazon.com */
|
||||
static void test_mixed(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
struct pt_range top_range = pt_top_range(priv->common);
|
||||
u64 start = 0x3fe400ULL << 12;
|
||||
u64 end = 0x4c0600ULL << 12;
|
||||
pt_vaddr_t len = end - start;
|
||||
pt_oaddr_t oa = start;
|
||||
|
||||
if (top_range.last_va <= start || sizeof(unsigned long) == 4)
|
||||
kunit_skip(test, "range is too small");
|
||||
if ((priv->safe_pgsize_bitmap & GENMASK(30, 21)) != (BIT(30) | BIT(21)))
|
||||
kunit_skip(test, "incompatible psize");
|
||||
|
||||
do_map(test, start, oa, len);
|
||||
/* 14 2M, 3 1G, 3 2M */
|
||||
KUNIT_ASSERT_EQ(test, count_valids(test), 20);
|
||||
check_iova(test, start, oa, len);
|
||||
}
|
||||
|
||||
static struct kunit_case iommu_test_cases[] = {
|
||||
KUNIT_CASE_FMT(test_increase_level),
|
||||
KUNIT_CASE_FMT(test_map_simple),
|
||||
KUNIT_CASE_FMT(test_map_table_to_oa),
|
||||
KUNIT_CASE_FMT(test_unmap_split),
|
||||
KUNIT_CASE_FMT(test_random_map),
|
||||
KUNIT_CASE_FMT(test_pgsize_boundary),
|
||||
KUNIT_CASE_FMT(test_mixed),
|
||||
{},
|
||||
};
|
||||
|
||||
static int pt_kunit_iommu_init(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv;
|
||||
int ret;
|
||||
|
||||
priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
|
||||
priv->orig_nr_secondary_pagetable =
|
||||
global_node_page_state(NR_SECONDARY_PAGETABLE);
|
||||
ret = pt_kunit_priv_init(test, priv);
|
||||
if (ret) {
|
||||
kunit_kfree(test, priv);
|
||||
return ret;
|
||||
}
|
||||
test->priv = priv;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pt_kunit_iommu_exit(struct kunit *test)
|
||||
{
|
||||
struct kunit_iommu_priv *priv = test->priv;
|
||||
|
||||
if (!test->priv)
|
||||
return;
|
||||
|
||||
pt_iommu_deinit(priv->iommu);
|
||||
/*
|
||||
* Look for memory leaks, assumes kunit is running isolated and nothing
|
||||
* else is using secondary page tables.
|
||||
*/
|
||||
KUNIT_ASSERT_EQ(test, priv->orig_nr_secondary_pagetable,
|
||||
global_node_page_state(NR_SECONDARY_PAGETABLE));
|
||||
kunit_kfree(test, test->priv);
|
||||
}
|
||||
|
||||
static struct kunit_suite NS(iommu_suite) = {
|
||||
.name = __stringify(NS(iommu_test)),
|
||||
.init = pt_kunit_iommu_init,
|
||||
.exit = pt_kunit_iommu_exit,
|
||||
.test_cases = iommu_test_cases,
|
||||
};
|
||||
kunit_test_suites(&NS(iommu_suite));
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Kunit for generic page table");
|
||||
MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
|
||||
|
|
@ -0,0 +1,389 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* This header is included after the format. It contains definitions
|
||||
* that build on the format definitions to create the basic format API.
|
||||
*
|
||||
* The format API is listed here, with kdocs. The functions without bodies are
|
||||
* implemented in the format using the pattern:
|
||||
* static inline FMTpt_XXX(..) {..}
|
||||
* #define pt_XXX FMTpt_XXX
|
||||
*
|
||||
* If the format doesn't implement a function then pt_fmt_defaults.h can provide
|
||||
* a generic version.
|
||||
*
|
||||
* The routines marked "@pts: Entry to query" operate on the entire contiguous
|
||||
* entry and can be called with a pts->index pointing to any sub item that makes
|
||||
* up that entry.
|
||||
*
|
||||
* The header order is:
|
||||
* pt_defs.h
|
||||
* FMT.h
|
||||
* pt_common.h
|
||||
*/
|
||||
#ifndef __GENERIC_PT_PT_COMMON_H
|
||||
#define __GENERIC_PT_PT_COMMON_H
|
||||
|
||||
#include "pt_defs.h"
|
||||
#include "pt_fmt_defaults.h"
|
||||
|
||||
/**
|
||||
* pt_attr_from_entry() - Convert the permission bits back to attrs
|
||||
* @pts: Entry to convert from
|
||||
* @attrs: Resulting attrs
|
||||
*
|
||||
* Fill in the attrs with the permission bits encoded in the current leaf entry.
|
||||
* The attrs should be usable with pt_install_leaf_entry() to reconstruct the
|
||||
* same entry.
|
||||
*/
|
||||
static inline void pt_attr_from_entry(const struct pt_state *pts,
|
||||
struct pt_write_attrs *attrs);
|
||||
|
||||
/**
|
||||
* pt_can_have_leaf() - True if the current level can have an OA entry
|
||||
* @pts: The current level
|
||||
*
|
||||
* True if the current level can support pt_install_leaf_entry(). A leaf
|
||||
* entry produce an OA.
|
||||
*/
|
||||
static inline bool pt_can_have_leaf(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_can_have_table() - True if the current level can have a lower table
|
||||
* @pts: The current level
|
||||
*
|
||||
* Every level except 0 is allowed to have a lower table.
|
||||
*/
|
||||
static inline bool pt_can_have_table(const struct pt_state *pts)
|
||||
{
|
||||
/* No further tables at level 0 */
|
||||
return pts->level > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_clear_entries() - Make entries empty (non-present)
|
||||
* @pts: Starting table index
|
||||
* @num_contig_lg2: Number of contiguous items to clear
|
||||
*
|
||||
* Clear a run of entries. A cleared entry will load back as PT_ENTRY_EMPTY
|
||||
* and does not have any effect on table walking. The starting index must be
|
||||
* aligned to num_contig_lg2.
|
||||
*/
|
||||
static inline void pt_clear_entries(struct pt_state *pts,
|
||||
unsigned int num_contig_lg2);
|
||||
|
||||
/**
|
||||
* pt_entry_make_write_dirty() - Make an entry dirty
|
||||
* @pts: Table entry to change
|
||||
*
|
||||
* Make pt_entry_is_write_dirty() return true for this entry. This can be called
|
||||
* asynchronously with any other table manipulation under a RCU lock and must
|
||||
* not corrupt the table.
|
||||
*/
|
||||
static inline bool pt_entry_make_write_dirty(struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_entry_make_write_clean() - Make the entry write clean
|
||||
* @pts: Table entry to change
|
||||
*
|
||||
* Modify the entry so that pt_entry_is_write_dirty() == false. The HW will
|
||||
* eventually be notified of this change via a TLB flush, which is the point
|
||||
* that the HW must become synchronized. Any "write dirty" prior to the TLB
|
||||
* flush can be lost, but once the TLB flush completes all writes must make
|
||||
* their entries write dirty.
|
||||
*
|
||||
* The format should alter the entry in a way that is compatible with any
|
||||
* concurrent update from HW. The entire contiguous entry is changed.
|
||||
*/
|
||||
static inline void pt_entry_make_write_clean(struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_entry_is_write_dirty() - True if the entry has been written to
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* "write dirty" means that the HW has written to the OA translated
|
||||
* by this entry. If the entry is contiguous then the consolidated
|
||||
* "write dirty" for all the items must be returned.
|
||||
*/
|
||||
static inline bool pt_entry_is_write_dirty(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_dirty_supported() - True if the page table supports dirty tracking
|
||||
* @common: Page table to query
|
||||
*/
|
||||
static inline bool pt_dirty_supported(struct pt_common *common);
|
||||
|
||||
/**
|
||||
* pt_entry_num_contig_lg2() - Number of contiguous items for this leaf entry
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* Return the number of contiguous items this leaf entry spans. If the entry
|
||||
* is single item it returns ilog2(1).
|
||||
*/
|
||||
static inline unsigned int pt_entry_num_contig_lg2(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_entry_oa() - Output Address for this leaf entry
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* Return the output address for the start of the entry. If the entry
|
||||
* is contiguous this returns the same value for each sub-item. I.e.::
|
||||
*
|
||||
* log2_mod(pt_entry_oa(), pt_entry_oa_lg2sz()) == 0
|
||||
*
|
||||
* See pt_item_oa(). The format should implement one of these two functions
|
||||
* depending on how it stores the OAs in the table.
|
||||
*/
|
||||
static inline pt_oaddr_t pt_entry_oa(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_entry_oa_lg2sz() - Return the size of an OA entry
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* If the entry is not contiguous this returns pt_table_item_lg2sz(), otherwise
|
||||
* it returns the total VA/OA size of the entire contiguous entry.
|
||||
*/
|
||||
static inline unsigned int pt_entry_oa_lg2sz(const struct pt_state *pts)
|
||||
{
|
||||
return pt_entry_num_contig_lg2(pts) + pt_table_item_lg2sz(pts);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_entry_oa_exact() - Return the complete OA for an entry
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* During iteration the first entry could have a VA with an offset from the
|
||||
* natural start of the entry. Return the exact OA including the pts's VA
|
||||
* offset.
|
||||
*/
|
||||
static inline pt_oaddr_t pt_entry_oa_exact(const struct pt_state *pts)
|
||||
{
|
||||
return _pt_entry_oa_fast(pts) |
|
||||
log2_mod(pts->range->va, pt_entry_oa_lg2sz(pts));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_full_va_prefix() - The top bits of the VA
|
||||
* @common: Page table to query
|
||||
*
|
||||
* This is usually 0, but some formats have their VA space going downward from
|
||||
* PT_VADDR_MAX, and will return that instead. This value must always be
|
||||
* adjusted by struct pt_common max_vasz_lg2.
|
||||
*/
|
||||
static inline pt_vaddr_t pt_full_va_prefix(const struct pt_common *common);
|
||||
|
||||
/**
|
||||
* pt_has_system_page_size() - True if level 0 can install a PAGE_SHIFT entry
|
||||
* @common: Page table to query
|
||||
*
|
||||
* If true the caller can use, at level 0, pt_install_leaf_entry(PAGE_SHIFT).
|
||||
* This is useful to create optimized paths for common cases of PAGE_SIZE
|
||||
* mappings.
|
||||
*/
|
||||
static inline bool pt_has_system_page_size(const struct pt_common *common);
|
||||
|
||||
/**
|
||||
* pt_install_leaf_entry() - Write a leaf entry to the table
|
||||
* @pts: Table index to change
|
||||
* @oa: Output Address for this leaf
|
||||
* @oasz_lg2: Size in VA/OA for this leaf
|
||||
* @attrs: Attributes to modify the entry
|
||||
*
|
||||
* A leaf OA entry will return PT_ENTRY_OA from pt_load_entry(). It translates
|
||||
* the VA indicated by pts to the given OA.
|
||||
*
|
||||
* For a single item non-contiguous entry oasz_lg2 is pt_table_item_lg2sz().
|
||||
* For contiguous it is pt_table_item_lg2sz() + num_contig_lg2.
|
||||
*
|
||||
* This must not be called if pt_can_have_leaf() == false. Contiguous sizes
|
||||
* not indicated by pt_possible_sizes() must not be specified.
|
||||
*/
|
||||
static inline void pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2,
|
||||
const struct pt_write_attrs *attrs);
|
||||
|
||||
/**
|
||||
* pt_install_table() - Write a table entry to the table
|
||||
* @pts: Table index to change
|
||||
* @table_pa: CPU physical address of the lower table's memory
|
||||
* @attrs: Attributes to modify the table index
|
||||
*
|
||||
* A table entry will return PT_ENTRY_TABLE from pt_load_entry(). The table_pa
|
||||
* is the table at pts->level - 1. This is done by cmpxchg so pts must have the
|
||||
* current entry loaded. The pts is updated with the installed entry.
|
||||
*
|
||||
* This must not be called if pt_can_have_table() == false.
|
||||
*
|
||||
* Returns: true if the table was installed successfully.
|
||||
*/
|
||||
static inline bool pt_install_table(struct pt_state *pts, pt_oaddr_t table_pa,
|
||||
const struct pt_write_attrs *attrs);
|
||||
|
||||
/**
|
||||
* pt_item_oa() - Output Address for this leaf item
|
||||
* @pts: Item to query
|
||||
*
|
||||
* Return the output address for this item. If the item is part of a contiguous
|
||||
* entry it returns the value of the OA for this individual sub item.
|
||||
*
|
||||
* See pt_entry_oa(). The format should implement one of these two functions
|
||||
* depending on how it stores the OA's in the table.
|
||||
*/
|
||||
static inline pt_oaddr_t pt_item_oa(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_load_entry_raw() - Read from the location pts points at into the pts
|
||||
* @pts: Table index to load
|
||||
*
|
||||
* Return the type of entry that was loaded. pts->entry will be filled in with
|
||||
* the entry's content. See pt_load_entry()
|
||||
*/
|
||||
static inline enum pt_entry_type pt_load_entry_raw(struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_max_oa_lg2() - Return the maximum OA the table format can hold
|
||||
* @common: Page table to query
|
||||
*
|
||||
* The value oalog2_to_max_int(pt_max_oa_lg2()) is the MAX for the
|
||||
* OA. This is the absolute maximum address the table can hold. struct pt_common
|
||||
* max_oasz_lg2 sets a lower dynamic maximum based on HW capability.
|
||||
*/
|
||||
static inline unsigned int
|
||||
pt_max_oa_lg2(const struct pt_common *common);
|
||||
|
||||
/**
|
||||
* pt_num_items_lg2() - Return the number of items in this table level
|
||||
* @pts: The current level
|
||||
*
|
||||
* The number of items in a table level defines the number of bits this level
|
||||
* decodes from the VA. This function is not called for the top level,
|
||||
* so it does not need to compute a special value for the top case. The
|
||||
* result for the top is based on pt_common max_vasz_lg2.
|
||||
*
|
||||
* The value is used as part of determining the table indexes via the
|
||||
* equation::
|
||||
*
|
||||
* log2_mod(log2_div(VA, pt_table_item_lg2sz()), pt_num_items_lg2())
|
||||
*/
|
||||
static inline unsigned int pt_num_items_lg2(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_pgsz_lg2_to_level - Return the level that maps the page size
|
||||
* @common: Page table to query
|
||||
* @pgsize_lg2: Log2 page size
|
||||
*
|
||||
* Returns the table level that will map the given page size. The page
|
||||
* size must be part of the pt_possible_sizes() for some level.
|
||||
*/
|
||||
static inline unsigned int pt_pgsz_lg2_to_level(struct pt_common *common,
|
||||
unsigned int pgsize_lg2);
|
||||
|
||||
/**
|
||||
* pt_possible_sizes() - Return a bitmap of possible output sizes at this level
|
||||
* @pts: The current level
|
||||
*
|
||||
* Each level has a list of possible output sizes that can be installed as
|
||||
* leaf entries. If pt_can_have_leaf() is false returns zero.
|
||||
*
|
||||
* Otherwise the bit in position pt_table_item_lg2sz() should be set indicating
|
||||
* that a non-contiguous single item leaf entry is supported. The following
|
||||
* pt_num_items_lg2() number of bits can be set indicating contiguous entries
|
||||
* are supported. Bit pt_table_item_lg2sz() + pt_num_items_lg2() must not be
|
||||
* set, contiguous entries cannot span the entire table.
|
||||
*
|
||||
* The OR of pt_possible_sizes() of all levels is the typical bitmask of all
|
||||
* supported sizes in the entire table.
|
||||
*/
|
||||
static inline pt_vaddr_t pt_possible_sizes(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_table_item_lg2sz() - Size of a single item entry in this table level
|
||||
* @pts: The current level
|
||||
*
|
||||
* The size of the item specifies how much VA and OA a single item occupies.
|
||||
*
|
||||
* See pt_entry_oa_lg2sz() for the same value including the effect of contiguous
|
||||
* entries.
|
||||
*/
|
||||
static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_table_oa_lg2sz() - Return the VA/OA size of the entire table
|
||||
* @pts: The current level
|
||||
*
|
||||
* Return the size of VA decoded by the entire table level.
|
||||
*/
|
||||
static inline unsigned int pt_table_oa_lg2sz(const struct pt_state *pts)
|
||||
{
|
||||
if (pts->range->top_level == pts->level)
|
||||
return pts->range->max_vasz_lg2;
|
||||
return min_t(unsigned int, pts->range->common->max_vasz_lg2,
|
||||
pt_num_items_lg2(pts) + pt_table_item_lg2sz(pts));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_table_pa() - Return the CPU physical address of the table entry
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* This is only ever called on PT_ENTRY_TABLE entries. Must return the same
|
||||
* value passed to pt_install_table().
|
||||
*/
|
||||
static inline pt_oaddr_t pt_table_pa(const struct pt_state *pts);
|
||||
|
||||
/**
|
||||
* pt_table_ptr() - Return a CPU pointer for a table item
|
||||
* @pts: Entry to query
|
||||
*
|
||||
* Same as pt_table_pa() but returns a CPU pointer.
|
||||
*/
|
||||
static inline struct pt_table_p *pt_table_ptr(const struct pt_state *pts)
|
||||
{
|
||||
return __va(pt_table_pa(pts));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_max_sw_bit() - Return the maximum software bit usable for any level and
|
||||
* entry
|
||||
* @common: Page table
|
||||
*
|
||||
* The swbit can be passed as bitnr to the other sw_bit functions.
|
||||
*/
|
||||
static inline unsigned int pt_max_sw_bit(struct pt_common *common);
|
||||
|
||||
/**
|
||||
* pt_test_sw_bit_acquire() - Read a software bit in an item
|
||||
* @pts: Entry to read
|
||||
* @bitnr: Bit to read
|
||||
*
|
||||
* Software bits are ignored by HW and can be used for any purpose by the
|
||||
* software. This does a test bit and acquire operation.
|
||||
*/
|
||||
static inline bool pt_test_sw_bit_acquire(struct pt_state *pts,
|
||||
unsigned int bitnr);
|
||||
|
||||
/**
|
||||
* pt_set_sw_bit_release() - Set a software bit in an item
|
||||
* @pts: Entry to set
|
||||
* @bitnr: Bit to set
|
||||
*
|
||||
* Software bits are ignored by HW and can be used for any purpose by the
|
||||
* software. This does a set bit and release operation.
|
||||
*/
|
||||
static inline void pt_set_sw_bit_release(struct pt_state *pts,
|
||||
unsigned int bitnr);
|
||||
|
||||
/**
|
||||
* pt_load_entry() - Read from the location pts points at into the pts
|
||||
* @pts: Table index to load
|
||||
*
|
||||
* Set the type of entry that was loaded. pts->entry and pts->table_lower
|
||||
* will be filled in with the entry's content.
|
||||
*/
|
||||
static inline void pt_load_entry(struct pt_state *pts)
|
||||
{
|
||||
pts->type = pt_load_entry_raw(pts);
|
||||
if (pts->type == PT_ENTRY_TABLE)
|
||||
pts->table_lower = pt_table_ptr(pts);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,332 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* This header is included before the format. It contains definitions
|
||||
* that are required to compile the format. The header order is:
|
||||
* pt_defs.h
|
||||
* fmt_XX.h
|
||||
* pt_common.h
|
||||
*/
|
||||
#ifndef __GENERIC_PT_DEFS_H
|
||||
#define __GENERIC_PT_DEFS_H
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kconfig.h>
|
||||
#include "pt_log2.h"
|
||||
|
||||
/* Header self-compile default defines */
|
||||
#ifndef pt_write_attrs
|
||||
typedef u64 pt_vaddr_t;
|
||||
typedef u64 pt_oaddr_t;
|
||||
#endif
|
||||
|
||||
struct pt_table_p;
|
||||
|
||||
enum {
|
||||
PT_VADDR_MAX = sizeof(pt_vaddr_t) == 8 ? U64_MAX : U32_MAX,
|
||||
PT_VADDR_MAX_LG2 = sizeof(pt_vaddr_t) == 8 ? 64 : 32,
|
||||
PT_OADDR_MAX = sizeof(pt_oaddr_t) == 8 ? U64_MAX : U32_MAX,
|
||||
PT_OADDR_MAX_LG2 = sizeof(pt_oaddr_t) == 8 ? 64 : 32,
|
||||
};
|
||||
|
||||
/*
|
||||
* The format instantiation can have features wired off or on to optimize the
|
||||
* code gen. Supported features are just a reflection of what the current set of
|
||||
* kernel users want to use.
|
||||
*/
|
||||
#ifndef PT_SUPPORTED_FEATURES
|
||||
#define PT_SUPPORTED_FEATURES 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* When in debug mode we compile all formats with all features. This allows the
|
||||
* kunit to test the full matrix. SIGN_EXTEND can't co-exist with DYNAMIC_TOP or
|
||||
* FULL_VA. DMA_INCOHERENT requires a SW bit that not all formats have
|
||||
*/
|
||||
#if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT)
|
||||
enum {
|
||||
PT_ORIG_SUPPORTED_FEATURES = PT_SUPPORTED_FEATURES,
|
||||
PT_DEBUG_SUPPORTED_FEATURES =
|
||||
UINT_MAX &
|
||||
~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_DMA_INCOHERENT) ?
|
||||
0 :
|
||||
BIT(PT_FEAT_DMA_INCOHERENT))) &
|
||||
~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_SIGN_EXTEND)) ?
|
||||
BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_FULL_VA) :
|
||||
BIT(PT_FEAT_SIGN_EXTEND)),
|
||||
};
|
||||
#undef PT_SUPPORTED_FEATURES
|
||||
#define PT_SUPPORTED_FEATURES PT_DEBUG_SUPPORTED_FEATURES
|
||||
#endif
|
||||
|
||||
#ifndef PT_FORCE_ENABLED_FEATURES
|
||||
#define PT_FORCE_ENABLED_FEATURES 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DOC: Generic Page Table Language
|
||||
*
|
||||
* Language used in Generic Page Table
|
||||
* VA
|
||||
* The input address to the page table, often the virtual address.
|
||||
* OA
|
||||
* The output address from the page table, often the physical address.
|
||||
* leaf
|
||||
* An entry that results in an output address.
|
||||
* start/end
|
||||
* An half-open range, e.g. [0,0) refers to no VA.
|
||||
* start/last
|
||||
* An inclusive closed range, e.g. [0,0] refers to the VA 0
|
||||
* common
|
||||
* The generic page table container struct pt_common
|
||||
* level
|
||||
* Level 0 is always a table of only leaves with no futher table pointers.
|
||||
* Increasing levels increase the size of the table items. The least
|
||||
* significant VA bits used to index page tables are used to index the Level
|
||||
* 0 table. The various labels for table levels used by HW descriptions are
|
||||
* not used.
|
||||
* top_level
|
||||
* The inclusive highest level of the table. A two-level table
|
||||
* has a top level of 1.
|
||||
* table
|
||||
* A linear array of translation items for that level.
|
||||
* index
|
||||
* The position in a table of an element: item = table[index]
|
||||
* item
|
||||
* A single index in a table
|
||||
* entry
|
||||
* A single logical element in a table. If contiguous pages are not
|
||||
* supported then item and entry are the same thing, otherwise entry refers
|
||||
* to all the items that comprise a single contiguous translation.
|
||||
* item/entry_size
|
||||
* The number of bytes of VA the table index translates for.
|
||||
* If the item is a table entry then the next table covers
|
||||
* this size. If the entry translates to an output address then the
|
||||
* full OA is: OA | (VA % entry_size)
|
||||
* contig_count
|
||||
* The number of consecutive items fused into a single entry.
|
||||
* item_size * contig_count is the size of that entry's translation.
|
||||
* lg2
|
||||
* Indicates the value is encoded as log2, i.e. 1<<x is the actual value.
|
||||
* Normally the compiler is fine to optimize divide and mod with log2 values
|
||||
* automatically when inlining, however if the values are not constant
|
||||
* expressions it can't. So we do it by hand; we want to avoid 64-bit
|
||||
* divmod.
|
||||
*/
|
||||
|
||||
/* Returned by pt_load_entry() and for_each_pt_level_entry() */
|
||||
enum pt_entry_type {
|
||||
PT_ENTRY_EMPTY,
|
||||
/* Entry is valid and points to a lower table level */
|
||||
PT_ENTRY_TABLE,
|
||||
/* Entry is valid and returns an output address */
|
||||
PT_ENTRY_OA,
|
||||
};
|
||||
|
||||
struct pt_range {
|
||||
struct pt_common *common;
|
||||
struct pt_table_p *top_table;
|
||||
pt_vaddr_t va;
|
||||
pt_vaddr_t last_va;
|
||||
u8 top_level;
|
||||
u8 max_vasz_lg2;
|
||||
};
|
||||
|
||||
/*
|
||||
* Similar to xa_state, this records information about an in-progress parse at a
|
||||
* single level.
|
||||
*/
|
||||
struct pt_state {
|
||||
struct pt_range *range;
|
||||
struct pt_table_p *table;
|
||||
struct pt_table_p *table_lower;
|
||||
u64 entry;
|
||||
enum pt_entry_type type;
|
||||
unsigned short index;
|
||||
unsigned short end_index;
|
||||
u8 level;
|
||||
};
|
||||
|
||||
#define pt_cur_table(pts, type) ((type *)((pts)->table))
|
||||
|
||||
/*
|
||||
* Try to install a new table pointer. The locking methodology requires this to
|
||||
* be atomic (multiple threads can race to install a pointer). The losing
|
||||
* threads will fail the atomic and return false. They should free any memory
|
||||
* and reparse the table level again.
|
||||
*/
|
||||
#if !IS_ENABLED(CONFIG_GENERIC_ATOMIC64)
|
||||
static inline bool pt_table_install64(struct pt_state *pts, u64 table_entry)
|
||||
{
|
||||
u64 *entryp = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 old_entry = pts->entry;
|
||||
bool ret;
|
||||
|
||||
/*
|
||||
* Ensure the zero'd table content itself is visible before its PTE can
|
||||
* be. release is a NOP on !SMP, but the HW is still doing an acquire.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_SMP))
|
||||
dma_wmb();
|
||||
ret = try_cmpxchg64_release(entryp, &old_entry, table_entry);
|
||||
if (ret)
|
||||
pts->entry = table_entry;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool pt_table_install32(struct pt_state *pts, u32 table_entry)
|
||||
{
|
||||
u32 *entryp = pt_cur_table(pts, u32) + pts->index;
|
||||
u32 old_entry = pts->entry;
|
||||
bool ret;
|
||||
|
||||
/*
|
||||
* Ensure the zero'd table content itself is visible before its PTE can
|
||||
* be. release is a NOP on !SMP, but the HW is still doing an acquire.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_SMP))
|
||||
dma_wmb();
|
||||
ret = try_cmpxchg_release(entryp, &old_entry, table_entry);
|
||||
if (ret)
|
||||
pts->entry = table_entry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define PT_SUPPORTED_FEATURE(feature_nr) (PT_SUPPORTED_FEATURES & BIT(feature_nr))
|
||||
|
||||
static inline bool pt_feature(const struct pt_common *common,
|
||||
unsigned int feature_nr)
|
||||
{
|
||||
if (PT_FORCE_ENABLED_FEATURES & BIT(feature_nr))
|
||||
return true;
|
||||
if (!PT_SUPPORTED_FEATURE(feature_nr))
|
||||
return false;
|
||||
return common->features & BIT(feature_nr);
|
||||
}
|
||||
|
||||
static inline bool pts_feature(const struct pt_state *pts,
|
||||
unsigned int feature_nr)
|
||||
{
|
||||
return pt_feature(pts->range->common, feature_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* PT_WARN_ON is used for invariants that the kunit should be checking can't
|
||||
* happen.
|
||||
*/
|
||||
#if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT)
|
||||
#define PT_WARN_ON WARN_ON
|
||||
#else
|
||||
static inline bool PT_WARN_ON(bool condition)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* These all work on the VA type */
|
||||
#define log2_to_int(a_lg2) log2_to_int_t(pt_vaddr_t, a_lg2)
|
||||
#define log2_to_max_int(a_lg2) log2_to_max_int_t(pt_vaddr_t, a_lg2)
|
||||
#define log2_div(a, b_lg2) log2_div_t(pt_vaddr_t, a, b_lg2)
|
||||
#define log2_div_eq(a, b, c_lg2) log2_div_eq_t(pt_vaddr_t, a, b, c_lg2)
|
||||
#define log2_mod(a, b_lg2) log2_mod_t(pt_vaddr_t, a, b_lg2)
|
||||
#define log2_mod_eq_max(a, b_lg2) log2_mod_eq_max_t(pt_vaddr_t, a, b_lg2)
|
||||
#define log2_set_mod(a, val, b_lg2) log2_set_mod_t(pt_vaddr_t, a, val, b_lg2)
|
||||
#define log2_set_mod_max(a, b_lg2) log2_set_mod_max_t(pt_vaddr_t, a, b_lg2)
|
||||
#define log2_mul(a, b_lg2) log2_mul_t(pt_vaddr_t, a, b_lg2)
|
||||
#define vaffs(a) ffs_t(pt_vaddr_t, a)
|
||||
#define vafls(a) fls_t(pt_vaddr_t, a)
|
||||
#define vaffz(a) ffz_t(pt_vaddr_t, a)
|
||||
|
||||
/*
|
||||
* The full VA (fva) versions permit the lg2 value to be == PT_VADDR_MAX_LG2 and
|
||||
* generate a useful defined result. The non-fva versions will malfunction at
|
||||
* this extreme.
|
||||
*/
|
||||
static inline pt_vaddr_t fvalog2_div(pt_vaddr_t a, unsigned int b_lg2)
|
||||
{
|
||||
if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2)
|
||||
return 0;
|
||||
return log2_div_t(pt_vaddr_t, a, b_lg2);
|
||||
}
|
||||
|
||||
static inline pt_vaddr_t fvalog2_mod(pt_vaddr_t a, unsigned int b_lg2)
|
||||
{
|
||||
if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2)
|
||||
return a;
|
||||
return log2_mod_t(pt_vaddr_t, a, b_lg2);
|
||||
}
|
||||
|
||||
static inline bool fvalog2_div_eq(pt_vaddr_t a, pt_vaddr_t b,
|
||||
unsigned int c_lg2)
|
||||
{
|
||||
if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && c_lg2 == PT_VADDR_MAX_LG2)
|
||||
return true;
|
||||
return log2_div_eq_t(pt_vaddr_t, a, b, c_lg2);
|
||||
}
|
||||
|
||||
static inline pt_vaddr_t fvalog2_set_mod(pt_vaddr_t a, pt_vaddr_t val,
|
||||
unsigned int b_lg2)
|
||||
{
|
||||
if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2)
|
||||
return val;
|
||||
return log2_set_mod_t(pt_vaddr_t, a, val, b_lg2);
|
||||
}
|
||||
|
||||
static inline pt_vaddr_t fvalog2_set_mod_max(pt_vaddr_t a, unsigned int b_lg2)
|
||||
{
|
||||
if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2)
|
||||
return PT_VADDR_MAX;
|
||||
return log2_set_mod_max_t(pt_vaddr_t, a, b_lg2);
|
||||
}
|
||||
|
||||
/* These all work on the OA type */
|
||||
#define oalog2_to_int(a_lg2) log2_to_int_t(pt_oaddr_t, a_lg2)
|
||||
#define oalog2_to_max_int(a_lg2) log2_to_max_int_t(pt_oaddr_t, a_lg2)
|
||||
#define oalog2_div(a, b_lg2) log2_div_t(pt_oaddr_t, a, b_lg2)
|
||||
#define oalog2_div_eq(a, b, c_lg2) log2_div_eq_t(pt_oaddr_t, a, b, c_lg2)
|
||||
#define oalog2_mod(a, b_lg2) log2_mod_t(pt_oaddr_t, a, b_lg2)
|
||||
#define oalog2_mod_eq_max(a, b_lg2) log2_mod_eq_max_t(pt_oaddr_t, a, b_lg2)
|
||||
#define oalog2_set_mod(a, val, b_lg2) log2_set_mod_t(pt_oaddr_t, a, val, b_lg2)
|
||||
#define oalog2_set_mod_max(a, b_lg2) log2_set_mod_max_t(pt_oaddr_t, a, b_lg2)
|
||||
#define oalog2_mul(a, b_lg2) log2_mul_t(pt_oaddr_t, a, b_lg2)
|
||||
#define oaffs(a) ffs_t(pt_oaddr_t, a)
|
||||
#define oafls(a) fls_t(pt_oaddr_t, a)
|
||||
#define oaffz(a) ffz_t(pt_oaddr_t, a)
|
||||
|
||||
static inline uintptr_t _pt_top_set(struct pt_table_p *table_mem,
|
||||
unsigned int top_level)
|
||||
{
|
||||
return top_level | (uintptr_t)table_mem;
|
||||
}
|
||||
|
||||
static inline void pt_top_set(struct pt_common *common,
|
||||
struct pt_table_p *table_mem,
|
||||
unsigned int top_level)
|
||||
{
|
||||
WRITE_ONCE(common->top_of_table, _pt_top_set(table_mem, top_level));
|
||||
}
|
||||
|
||||
static inline void pt_top_set_level(struct pt_common *common,
|
||||
unsigned int top_level)
|
||||
{
|
||||
pt_top_set(common, NULL, top_level);
|
||||
}
|
||||
|
||||
static inline unsigned int pt_top_get_level(const struct pt_common *common)
|
||||
{
|
||||
return READ_ONCE(common->top_of_table) % (1 << PT_TOP_LEVEL_BITS);
|
||||
}
|
||||
|
||||
static inline bool pt_check_install_leaf_args(struct pt_state *pts,
|
||||
pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,295 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Default definitions for formats that don't define these functions.
|
||||
*/
|
||||
#ifndef __GENERIC_PT_PT_FMT_DEFAULTS_H
|
||||
#define __GENERIC_PT_PT_FMT_DEFAULTS_H
|
||||
|
||||
#include "pt_defs.h"
|
||||
#include <linux/log2.h>
|
||||
|
||||
/* Header self-compile default defines */
|
||||
#ifndef pt_load_entry_raw
|
||||
#include "fmt/amdv1.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The format must provide PT_GRANULE_LG2SZ, PT_TABLEMEM_LG2SZ, and
|
||||
* PT_ITEM_WORD_SIZE. They must be the same at every level excluding the top.
|
||||
*/
|
||||
#ifndef pt_table_item_lg2sz
|
||||
static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts)
|
||||
{
|
||||
return PT_GRANULE_LG2SZ +
|
||||
(PT_TABLEMEM_LG2SZ - ilog2(PT_ITEM_WORD_SIZE)) * pts->level;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef pt_pgsz_lg2_to_level
|
||||
static inline unsigned int pt_pgsz_lg2_to_level(struct pt_common *common,
|
||||
unsigned int pgsize_lg2)
|
||||
{
|
||||
return ((unsigned int)(pgsize_lg2 - PT_GRANULE_LG2SZ)) /
|
||||
(PT_TABLEMEM_LG2SZ - ilog2(PT_ITEM_WORD_SIZE));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If not supplied by the format then contiguous pages are not supported.
|
||||
*
|
||||
* If contiguous pages are supported then the format must also provide
|
||||
* pt_contig_count_lg2() if it supports a single contiguous size per level,
|
||||
* or pt_possible_sizes() if it supports multiple sizes per level.
|
||||
*/
|
||||
#ifndef pt_entry_num_contig_lg2
|
||||
static inline unsigned int pt_entry_num_contig_lg2(const struct pt_state *pts)
|
||||
{
|
||||
return ilog2(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of contiguous OA items forming an entry at this table level
|
||||
*/
|
||||
static inline unsigned short pt_contig_count_lg2(const struct pt_state *pts)
|
||||
{
|
||||
return ilog2(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If not supplied by the format then dirty tracking is not supported */
|
||||
#ifndef pt_entry_is_write_dirty
|
||||
static inline bool pt_entry_is_write_dirty(const struct pt_state *pts)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void pt_entry_make_write_clean(struct pt_state *pts)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool pt_dirty_supported(struct pt_common *common)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
/* If not supplied then dirty tracking is always enabled */
|
||||
#ifndef pt_dirty_supported
|
||||
static inline bool pt_dirty_supported(struct pt_common *common)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef pt_entry_make_write_dirty
|
||||
static inline bool pt_entry_make_write_dirty(struct pt_state *pts)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Format supplies either:
|
||||
* pt_entry_oa - OA is at the start of a contiguous entry
|
||||
* or
|
||||
* pt_item_oa - OA is adjusted for every item in a contiguous entry
|
||||
*
|
||||
* Build the missing one
|
||||
*
|
||||
* The internal helper _pt_entry_oa_fast() allows generating
|
||||
* an efficient pt_entry_oa_exact(), it doesn't care which
|
||||
* option is selected.
|
||||
*/
|
||||
#ifdef pt_entry_oa
|
||||
static inline pt_oaddr_t pt_item_oa(const struct pt_state *pts)
|
||||
{
|
||||
return pt_entry_oa(pts) |
|
||||
log2_mul(pts->index, pt_table_item_lg2sz(pts));
|
||||
}
|
||||
#define _pt_entry_oa_fast pt_entry_oa
|
||||
#endif
|
||||
|
||||
#ifdef pt_item_oa
|
||||
static inline pt_oaddr_t pt_entry_oa(const struct pt_state *pts)
|
||||
{
|
||||
return log2_set_mod(pt_item_oa(pts), 0,
|
||||
pt_entry_num_contig_lg2(pts) +
|
||||
pt_table_item_lg2sz(pts));
|
||||
}
|
||||
#define _pt_entry_oa_fast pt_item_oa
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If not supplied by the format then use the constant
|
||||
* PT_MAX_OUTPUT_ADDRESS_LG2.
|
||||
*/
|
||||
#ifndef pt_max_oa_lg2
|
||||
static inline unsigned int
|
||||
pt_max_oa_lg2(const struct pt_common *common)
|
||||
{
|
||||
return PT_MAX_OUTPUT_ADDRESS_LG2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef pt_has_system_page_size
|
||||
static inline bool pt_has_system_page_size(const struct pt_common *common)
|
||||
{
|
||||
return PT_GRANULE_LG2SZ == PAGE_SHIFT;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If not supplied by the format then assume only one contiguous size determined
|
||||
* by pt_contig_count_lg2()
|
||||
*/
|
||||
#ifndef pt_possible_sizes
|
||||
static inline unsigned short pt_contig_count_lg2(const struct pt_state *pts);
|
||||
|
||||
/* Return a bitmap of possible leaf page sizes at this level */
|
||||
static inline pt_vaddr_t pt_possible_sizes(const struct pt_state *pts)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
|
||||
if (!pt_can_have_leaf(pts))
|
||||
return 0;
|
||||
return log2_to_int(isz_lg2) |
|
||||
log2_to_int(pt_contig_count_lg2(pts) + isz_lg2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If not supplied by the format then use 0. */
|
||||
#ifndef pt_full_va_prefix
|
||||
static inline pt_vaddr_t pt_full_va_prefix(const struct pt_common *common)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If not supplied by the format then zero fill using PT_ITEM_WORD_SIZE */
|
||||
#ifndef pt_clear_entries
|
||||
static inline void pt_clear_entries64(struct pt_state *pts,
|
||||
unsigned int num_contig_lg2)
|
||||
{
|
||||
u64 *tablep = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
PT_WARN_ON(log2_mod(pts->index, num_contig_lg2));
|
||||
for (; tablep != end; tablep++)
|
||||
WRITE_ONCE(*tablep, 0);
|
||||
}
|
||||
|
||||
static inline void pt_clear_entries32(struct pt_state *pts,
|
||||
unsigned int num_contig_lg2)
|
||||
{
|
||||
u32 *tablep = pt_cur_table(pts, u32) + pts->index;
|
||||
u32 *end = tablep + log2_to_int(num_contig_lg2);
|
||||
|
||||
PT_WARN_ON(log2_mod(pts->index, num_contig_lg2));
|
||||
for (; tablep != end; tablep++)
|
||||
WRITE_ONCE(*tablep, 0);
|
||||
}
|
||||
|
||||
static inline void pt_clear_entries(struct pt_state *pts,
|
||||
unsigned int num_contig_lg2)
|
||||
{
|
||||
if (PT_ITEM_WORD_SIZE == sizeof(u32))
|
||||
pt_clear_entries32(pts, num_contig_lg2);
|
||||
else
|
||||
pt_clear_entries64(pts, num_contig_lg2);
|
||||
}
|
||||
#define pt_clear_entries pt_clear_entries
|
||||
#endif
|
||||
|
||||
/* If not supplied then SW bits are not supported */
|
||||
#ifdef pt_sw_bit
|
||||
static inline bool pt_test_sw_bit_acquire(struct pt_state *pts,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
/* Acquire, pairs with pt_set_sw_bit_release() */
|
||||
smp_mb();
|
||||
/* For a contiguous entry the sw bit is only stored in the first item. */
|
||||
return pts->entry & pt_sw_bit(bitnr);
|
||||
}
|
||||
#define pt_test_sw_bit_acquire pt_test_sw_bit_acquire
|
||||
|
||||
static inline void pt_set_sw_bit_release(struct pt_state *pts,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
#if !IS_ENABLED(CONFIG_GENERIC_ATOMIC64)
|
||||
if (PT_ITEM_WORD_SIZE == sizeof(u64)) {
|
||||
u64 *entryp = pt_cur_table(pts, u64) + pts->index;
|
||||
u64 old_entry = pts->entry;
|
||||
u64 new_entry;
|
||||
|
||||
do {
|
||||
new_entry = old_entry | pt_sw_bit(bitnr);
|
||||
} while (!try_cmpxchg64_release(entryp, &old_entry, new_entry));
|
||||
pts->entry = new_entry;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (PT_ITEM_WORD_SIZE == sizeof(u32)) {
|
||||
u32 *entryp = pt_cur_table(pts, u32) + pts->index;
|
||||
u32 old_entry = pts->entry;
|
||||
u32 new_entry;
|
||||
|
||||
do {
|
||||
new_entry = old_entry | pt_sw_bit(bitnr);
|
||||
} while (!try_cmpxchg_release(entryp, &old_entry, new_entry));
|
||||
pts->entry = new_entry;
|
||||
} else
|
||||
BUILD_BUG();
|
||||
}
|
||||
#define pt_set_sw_bit_release pt_set_sw_bit_release
|
||||
#else
|
||||
static inline unsigned int pt_max_sw_bit(struct pt_common *common)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void __pt_no_sw_bit(void);
|
||||
static inline bool pt_test_sw_bit_acquire(struct pt_state *pts,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
__pt_no_sw_bit();
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void pt_set_sw_bit_release(struct pt_state *pts,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
__pt_no_sw_bit();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Format can call in the pt_install_leaf_entry() to check the arguments are all
|
||||
* aligned correctly.
|
||||
*/
|
||||
static inline bool pt_check_install_leaf_args(struct pt_state *pts,
|
||||
pt_oaddr_t oa,
|
||||
unsigned int oasz_lg2)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
|
||||
if (PT_WARN_ON(oalog2_mod(oa, oasz_lg2)))
|
||||
return false;
|
||||
|
||||
#ifdef pt_possible_sizes
|
||||
if (PT_WARN_ON(isz_lg2 > oasz_lg2 ||
|
||||
oasz_lg2 > isz_lg2 + pt_num_items_lg2(pts)))
|
||||
return false;
|
||||
#else
|
||||
if (PT_WARN_ON(oasz_lg2 != isz_lg2 &&
|
||||
oasz_lg2 != isz_lg2 + pt_contig_count_lg2(pts)))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (PT_WARN_ON(oalog2_mod(pts->index, oasz_lg2 - isz_lg2)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,636 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Iterators for Generic Page Table
|
||||
*/
|
||||
#ifndef __GENERIC_PT_PT_ITER_H
|
||||
#define __GENERIC_PT_PT_ITER_H
|
||||
|
||||
#include "pt_common.h"
|
||||
|
||||
#include <linux/errno.h>
|
||||
|
||||
/*
|
||||
* Use to mangle symbols so that backtraces and the symbol table are
|
||||
* understandable. Any non-inlined function should get mangled like this.
|
||||
*/
|
||||
#define NS(fn) CONCATENATE(PTPFX, fn)
|
||||
|
||||
/**
|
||||
* pt_check_range() - Validate the range can be iterated
|
||||
* @range: Range to validate
|
||||
*
|
||||
* Check that VA and last_va fall within the permitted range of VAs. If the
|
||||
* format is using PT_FEAT_SIGN_EXTEND then this also checks the sign extension
|
||||
* is correct.
|
||||
*/
|
||||
static inline int pt_check_range(struct pt_range *range)
|
||||
{
|
||||
pt_vaddr_t prefix;
|
||||
|
||||
PT_WARN_ON(!range->max_vasz_lg2);
|
||||
|
||||
if (pt_feature(range->common, PT_FEAT_SIGN_EXTEND)) {
|
||||
PT_WARN_ON(range->common->max_vasz_lg2 != range->max_vasz_lg2);
|
||||
prefix = fvalog2_div(range->va, range->max_vasz_lg2 - 1) ?
|
||||
PT_VADDR_MAX :
|
||||
0;
|
||||
} else {
|
||||
prefix = pt_full_va_prefix(range->common);
|
||||
}
|
||||
|
||||
if (!fvalog2_div_eq(range->va, prefix, range->max_vasz_lg2) ||
|
||||
!fvalog2_div_eq(range->last_va, prefix, range->max_vasz_lg2))
|
||||
return -ERANGE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_index_to_va() - Update range->va to the current pts->index
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* Adjust range->va to match the current index. This is done in a lazy manner
|
||||
* since computing the VA takes several instructions and is rarely required.
|
||||
*/
|
||||
static inline void pt_index_to_va(struct pt_state *pts)
|
||||
{
|
||||
pt_vaddr_t lower_va;
|
||||
|
||||
lower_va = log2_mul(pts->index, pt_table_item_lg2sz(pts));
|
||||
pts->range->va = fvalog2_set_mod(pts->range->va, lower_va,
|
||||
pt_table_oa_lg2sz(pts));
|
||||
}
|
||||
|
||||
/*
|
||||
* Add index_count_lg2 number of entries to pts's VA and index. The VA will be
|
||||
* adjusted to the end of the contiguous block if it is currently in the middle.
|
||||
*/
|
||||
static inline void _pt_advance(struct pt_state *pts,
|
||||
unsigned int index_count_lg2)
|
||||
{
|
||||
pts->index = log2_set_mod(pts->index + log2_to_int(index_count_lg2), 0,
|
||||
index_count_lg2);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_entry_fully_covered() - Check if the item or entry is entirely contained
|
||||
* within pts->range
|
||||
* @pts: Iteration State
|
||||
* @oasz_lg2: The size of the item to check, pt_table_item_lg2sz() or
|
||||
* pt_entry_oa_lg2sz()
|
||||
*
|
||||
* Returns: true if the item is fully enclosed by the pts->range.
|
||||
*/
|
||||
static inline bool pt_entry_fully_covered(const struct pt_state *pts,
|
||||
unsigned int oasz_lg2)
|
||||
{
|
||||
struct pt_range *range = pts->range;
|
||||
|
||||
/* Range begins at the start of the entry */
|
||||
if (log2_mod(pts->range->va, oasz_lg2))
|
||||
return false;
|
||||
|
||||
/* Range ends past the end of the entry */
|
||||
if (!log2_div_eq(range->va, range->last_va, oasz_lg2))
|
||||
return true;
|
||||
|
||||
/* Range ends at the end of the entry */
|
||||
return log2_mod_eq_max(range->last_va, oasz_lg2);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_range_to_index() - Starting index for an iteration
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* Return: the starting index for the iteration in pts.
|
||||
*/
|
||||
static inline unsigned int pt_range_to_index(const struct pt_state *pts)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
|
||||
PT_WARN_ON(pts->level > pts->range->top_level);
|
||||
if (pts->range->top_level == pts->level)
|
||||
return log2_div(fvalog2_mod(pts->range->va,
|
||||
pts->range->max_vasz_lg2),
|
||||
isz_lg2);
|
||||
return log2_mod(log2_div(pts->range->va, isz_lg2),
|
||||
pt_num_items_lg2(pts));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_range_to_end_index() - Ending index iteration
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* Return: the last index for the iteration in pts.
|
||||
*/
|
||||
static inline unsigned int pt_range_to_end_index(const struct pt_state *pts)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
|
||||
struct pt_range *range = pts->range;
|
||||
unsigned int num_entries_lg2;
|
||||
|
||||
if (range->va == range->last_va)
|
||||
return pts->index + 1;
|
||||
|
||||
if (pts->range->top_level == pts->level)
|
||||
return log2_div(fvalog2_mod(pts->range->last_va,
|
||||
pts->range->max_vasz_lg2),
|
||||
isz_lg2) +
|
||||
1;
|
||||
|
||||
num_entries_lg2 = pt_num_items_lg2(pts);
|
||||
|
||||
/* last_va falls within this table */
|
||||
if (log2_div_eq(range->va, range->last_va, num_entries_lg2 + isz_lg2))
|
||||
return log2_mod(log2_div(pts->range->last_va, isz_lg2),
|
||||
num_entries_lg2) +
|
||||
1;
|
||||
|
||||
return log2_to_int(num_entries_lg2);
|
||||
}
|
||||
|
||||
static inline void _pt_iter_first(struct pt_state *pts)
|
||||
{
|
||||
pts->index = pt_range_to_index(pts);
|
||||
pts->end_index = pt_range_to_end_index(pts);
|
||||
PT_WARN_ON(pts->index > pts->end_index);
|
||||
}
|
||||
|
||||
static inline bool _pt_iter_load(struct pt_state *pts)
|
||||
{
|
||||
if (pts->index >= pts->end_index)
|
||||
return false;
|
||||
pt_load_entry(pts);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_next_entry() - Advance pts to the next entry
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* Update pts to go to the next index at this level. If pts is pointing at a
|
||||
* contiguous entry then the index may advance my more than one.
|
||||
*/
|
||||
static inline void pt_next_entry(struct pt_state *pts)
|
||||
{
|
||||
if (pts->type == PT_ENTRY_OA &&
|
||||
!__builtin_constant_p(pt_entry_num_contig_lg2(pts) == 0))
|
||||
_pt_advance(pts, pt_entry_num_contig_lg2(pts));
|
||||
else
|
||||
pts->index++;
|
||||
pt_index_to_va(pts);
|
||||
}
|
||||
|
||||
/**
|
||||
* for_each_pt_level_entry() - For loop wrapper over entries in the range
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* This is the basic iteration primitive. It iterates over all the entries in
|
||||
* pts->range that fall within the pts's current table level. Each step does
|
||||
* pt_load_entry(pts).
|
||||
*/
|
||||
#define for_each_pt_level_entry(pts) \
|
||||
for (_pt_iter_first(pts); _pt_iter_load(pts); pt_next_entry(pts))
|
||||
|
||||
/**
|
||||
* pt_load_single_entry() - Version of pt_load_entry() usable within a walker
|
||||
* @pts: Iteration State
|
||||
*
|
||||
* Alternative to for_each_pt_level_entry() if the walker function uses only a
|
||||
* single entry.
|
||||
*/
|
||||
static inline enum pt_entry_type pt_load_single_entry(struct pt_state *pts)
|
||||
{
|
||||
pts->index = pt_range_to_index(pts);
|
||||
pt_load_entry(pts);
|
||||
return pts->type;
|
||||
}
|
||||
|
||||
static __always_inline struct pt_range _pt_top_range(struct pt_common *common,
|
||||
uintptr_t top_of_table)
|
||||
{
|
||||
struct pt_range range = {
|
||||
.common = common,
|
||||
.top_table =
|
||||
(struct pt_table_p *)(top_of_table &
|
||||
~(uintptr_t)PT_TOP_LEVEL_MASK),
|
||||
.top_level = top_of_table % (1 << PT_TOP_LEVEL_BITS),
|
||||
};
|
||||
struct pt_state pts = { .range = &range, .level = range.top_level };
|
||||
unsigned int max_vasz_lg2;
|
||||
|
||||
max_vasz_lg2 = common->max_vasz_lg2;
|
||||
if (pt_feature(common, PT_FEAT_DYNAMIC_TOP) &&
|
||||
pts.level != PT_MAX_TOP_LEVEL)
|
||||
max_vasz_lg2 = min_t(unsigned int, common->max_vasz_lg2,
|
||||
pt_num_items_lg2(&pts) +
|
||||
pt_table_item_lg2sz(&pts));
|
||||
|
||||
/*
|
||||
* The top range will default to the lower region only with sign extend.
|
||||
*/
|
||||
range.max_vasz_lg2 = max_vasz_lg2;
|
||||
if (pt_feature(common, PT_FEAT_SIGN_EXTEND))
|
||||
max_vasz_lg2--;
|
||||
|
||||
range.va = fvalog2_set_mod(pt_full_va_prefix(common), 0, max_vasz_lg2);
|
||||
range.last_va =
|
||||
fvalog2_set_mod_max(pt_full_va_prefix(common), max_vasz_lg2);
|
||||
return range;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_top_range() - Return a range that spans part of the top level
|
||||
* @common: Table
|
||||
*
|
||||
* For PT_FEAT_SIGN_EXTEND this will return the lower range, and cover half the
|
||||
* total page table. Otherwise it returns the entire page table.
|
||||
*/
|
||||
static __always_inline struct pt_range pt_top_range(struct pt_common *common)
|
||||
{
|
||||
/*
|
||||
* The top pointer can change without locking. We capture the value and
|
||||
* it's level here and are safe to walk it so long as both values are
|
||||
* captured without tearing.
|
||||
*/
|
||||
return _pt_top_range(common, READ_ONCE(common->top_of_table));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_all_range() - Return a range that spans the entire page table
|
||||
* @common: Table
|
||||
*
|
||||
* The returned range spans the whole page table. Due to how PT_FEAT_SIGN_EXTEND
|
||||
* is supported range->va and range->last_va will be incorrect during the
|
||||
* iteration and must not be accessed.
|
||||
*/
|
||||
static inline struct pt_range pt_all_range(struct pt_common *common)
|
||||
{
|
||||
struct pt_range range = pt_top_range(common);
|
||||
|
||||
if (!pt_feature(common, PT_FEAT_SIGN_EXTEND))
|
||||
return range;
|
||||
|
||||
/*
|
||||
* Pretend the table is linear from 0 without a sign extension. This
|
||||
* generates the correct indexes for iteration.
|
||||
*/
|
||||
range.last_va = fvalog2_set_mod_max(0, range.max_vasz_lg2);
|
||||
return range;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_upper_range() - Return a range that spans part of the top level
|
||||
* @common: Table
|
||||
*
|
||||
* For PT_FEAT_SIGN_EXTEND this will return the upper range, and cover half the
|
||||
* total page table. Otherwise it returns the entire page table.
|
||||
*/
|
||||
static inline struct pt_range pt_upper_range(struct pt_common *common)
|
||||
{
|
||||
struct pt_range range = pt_top_range(common);
|
||||
|
||||
if (!pt_feature(common, PT_FEAT_SIGN_EXTEND))
|
||||
return range;
|
||||
|
||||
range.va = fvalog2_set_mod(PT_VADDR_MAX, 0, range.max_vasz_lg2 - 1);
|
||||
range.last_va = PT_VADDR_MAX;
|
||||
return range;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_make_range() - Return a range that spans part of the table
|
||||
* @common: Table
|
||||
* @va: Start address
|
||||
* @last_va: Last address
|
||||
*
|
||||
* The caller must validate the range with pt_check_range() before using it.
|
||||
*/
|
||||
static __always_inline struct pt_range
|
||||
pt_make_range(struct pt_common *common, pt_vaddr_t va, pt_vaddr_t last_va)
|
||||
{
|
||||
struct pt_range range =
|
||||
_pt_top_range(common, READ_ONCE(common->top_of_table));
|
||||
|
||||
range.va = va;
|
||||
range.last_va = last_va;
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
/*
|
||||
* Span a slice of the table starting at a lower table level from an active
|
||||
* walk.
|
||||
*/
|
||||
static __always_inline struct pt_range
|
||||
pt_make_child_range(const struct pt_range *parent, pt_vaddr_t va,
|
||||
pt_vaddr_t last_va)
|
||||
{
|
||||
struct pt_range range = *parent;
|
||||
|
||||
range.va = va;
|
||||
range.last_va = last_va;
|
||||
|
||||
PT_WARN_ON(last_va < va);
|
||||
PT_WARN_ON(pt_check_range(&range));
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_init() - Initialize a pt_state on the stack
|
||||
* @range: Range pointer to embed in the state
|
||||
* @level: Table level for the state
|
||||
* @table: Pointer to the table memory at level
|
||||
*
|
||||
* Helper to initialize the on-stack pt_state from walker arguments.
|
||||
*/
|
||||
static __always_inline struct pt_state
|
||||
pt_init(struct pt_range *range, unsigned int level, struct pt_table_p *table)
|
||||
{
|
||||
struct pt_state pts = {
|
||||
.range = range,
|
||||
.table = table,
|
||||
.level = level,
|
||||
};
|
||||
return pts;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_init_top() - Initialize a pt_state on the stack
|
||||
* @range: Range pointer to embed in the state
|
||||
*
|
||||
* The pt_state points to the top most level.
|
||||
*/
|
||||
static __always_inline struct pt_state pt_init_top(struct pt_range *range)
|
||||
{
|
||||
return pt_init(range, range->top_level, range->top_table);
|
||||
}
|
||||
|
||||
typedef int (*pt_level_fn_t)(struct pt_range *range, void *arg,
|
||||
unsigned int level, struct pt_table_p *table);
|
||||
|
||||
/**
|
||||
* pt_descend() - Recursively invoke the walker for the lower level
|
||||
* @pts: Iteration State
|
||||
* @arg: Value to pass to the function
|
||||
* @fn: Walker function to call
|
||||
*
|
||||
* pts must point to a table item. Invoke fn as a walker on the table
|
||||
* pts points to.
|
||||
*/
|
||||
static __always_inline int pt_descend(struct pt_state *pts, void *arg,
|
||||
pt_level_fn_t fn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (PT_WARN_ON(!pts->table_lower))
|
||||
return -EINVAL;
|
||||
|
||||
ret = (*fn)(pts->range, arg, pts->level - 1, pts->table_lower);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_walk_range() - Walk over a VA range
|
||||
* @range: Range pointer
|
||||
* @fn: Walker function to call
|
||||
* @arg: Value to pass to the function
|
||||
*
|
||||
* Walk over a VA range. The caller should have done a validity check, at
|
||||
* least calling pt_check_range(), when building range. The walk will
|
||||
* start at the top most table.
|
||||
*/
|
||||
static __always_inline int pt_walk_range(struct pt_range *range,
|
||||
pt_level_fn_t fn, void *arg)
|
||||
{
|
||||
return fn(range, arg, range->top_level, range->top_table);
|
||||
}
|
||||
|
||||
/*
|
||||
* pt_walk_descend() - Recursively invoke the walker for a slice of a lower
|
||||
* level
|
||||
* @pts: Iteration State
|
||||
* @va: Start address
|
||||
* @last_va: Last address
|
||||
* @fn: Walker function to call
|
||||
* @arg: Value to pass to the function
|
||||
*
|
||||
* With pts pointing at a table item this will descend and over a slice of the
|
||||
* lower table. The caller must ensure that va/last_va are within the table
|
||||
* item. This creates a new walk and does not alter pts or pts->range.
|
||||
*/
|
||||
static __always_inline int pt_walk_descend(const struct pt_state *pts,
|
||||
pt_vaddr_t va, pt_vaddr_t last_va,
|
||||
pt_level_fn_t fn, void *arg)
|
||||
{
|
||||
struct pt_range range = pt_make_child_range(pts->range, va, last_va);
|
||||
|
||||
if (PT_WARN_ON(!pt_can_have_table(pts)) ||
|
||||
PT_WARN_ON(!pts->table_lower))
|
||||
return -EINVAL;
|
||||
|
||||
return fn(&range, arg, pts->level - 1, pts->table_lower);
|
||||
}
|
||||
|
||||
/*
|
||||
* pt_walk_descend_all() - Recursively invoke the walker for a table item
|
||||
* @parent_pts: Iteration State
|
||||
* @fn: Walker function to call
|
||||
* @arg: Value to pass to the function
|
||||
*
|
||||
* With pts pointing at a table item this will descend and over the entire lower
|
||||
* table. This creates a new walk and does not alter pts or pts->range.
|
||||
*/
|
||||
static __always_inline int
|
||||
pt_walk_descend_all(const struct pt_state *parent_pts, pt_level_fn_t fn,
|
||||
void *arg)
|
||||
{
|
||||
unsigned int isz_lg2 = pt_table_item_lg2sz(parent_pts);
|
||||
|
||||
return pt_walk_descend(parent_pts,
|
||||
log2_set_mod(parent_pts->range->va, 0, isz_lg2),
|
||||
log2_set_mod_max(parent_pts->range->va, isz_lg2),
|
||||
fn, arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_range_slice() - Return a range that spans indexes
|
||||
* @pts: Iteration State
|
||||
* @start_index: Starting index within pts
|
||||
* @end_index: Ending index within pts
|
||||
*
|
||||
* Create a range than spans an index range of the current table level
|
||||
* pt_state points at.
|
||||
*/
|
||||
static inline struct pt_range pt_range_slice(const struct pt_state *pts,
|
||||
unsigned int start_index,
|
||||
unsigned int end_index)
|
||||
{
|
||||
unsigned int table_lg2sz = pt_table_oa_lg2sz(pts);
|
||||
pt_vaddr_t last_va;
|
||||
pt_vaddr_t va;
|
||||
|
||||
va = fvalog2_set_mod(pts->range->va,
|
||||
log2_mul(start_index, pt_table_item_lg2sz(pts)),
|
||||
table_lg2sz);
|
||||
last_va = fvalog2_set_mod(
|
||||
pts->range->va,
|
||||
log2_mul(end_index, pt_table_item_lg2sz(pts)) - 1, table_lg2sz);
|
||||
return pt_make_child_range(pts->range, va, last_va);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_top_memsize_lg2()
|
||||
* @common: Table
|
||||
* @top_of_table: Top of table value from _pt_top_set()
|
||||
*
|
||||
* Compute the allocation size of the top table. For PT_FEAT_DYNAMIC_TOP this
|
||||
* will compute the top size assuming the table will grow.
|
||||
*/
|
||||
static inline unsigned int pt_top_memsize_lg2(struct pt_common *common,
|
||||
uintptr_t top_of_table)
|
||||
{
|
||||
struct pt_range range = _pt_top_range(common, top_of_table);
|
||||
struct pt_state pts = pt_init_top(&range);
|
||||
unsigned int num_items_lg2;
|
||||
|
||||
num_items_lg2 = common->max_vasz_lg2 - pt_table_item_lg2sz(&pts);
|
||||
if (range.top_level != PT_MAX_TOP_LEVEL &&
|
||||
pt_feature(common, PT_FEAT_DYNAMIC_TOP))
|
||||
num_items_lg2 = min(num_items_lg2, pt_num_items_lg2(&pts));
|
||||
|
||||
/* Round up the allocation size to the minimum alignment */
|
||||
return max(ffs_t(u64, PT_TOP_PHYS_MASK),
|
||||
num_items_lg2 + ilog2(PT_ITEM_WORD_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_compute_best_pgsize() - Determine the best page size for leaf entries
|
||||
* @pgsz_bitmap: Permitted page sizes
|
||||
* @va: Starting virtual address for the leaf entry
|
||||
* @last_va: Last virtual address for the leaf entry, sets the max page size
|
||||
* @oa: Starting output address for the leaf entry
|
||||
*
|
||||
* Compute the largest page size for va, last_va, and oa together and return it
|
||||
* in lg2. The largest page size depends on the format's supported page sizes at
|
||||
* this level, and the relative alignment of the VA and OA addresses. 0 means
|
||||
* the OA cannot be stored with the provided pgsz_bitmap.
|
||||
*/
|
||||
static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap,
|
||||
pt_vaddr_t va,
|
||||
pt_vaddr_t last_va,
|
||||
pt_oaddr_t oa)
|
||||
{
|
||||
unsigned int best_pgsz_lg2;
|
||||
unsigned int pgsz_lg2;
|
||||
pt_vaddr_t len = last_va - va + 1;
|
||||
pt_vaddr_t mask;
|
||||
|
||||
if (PT_WARN_ON(va >= last_va))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Given a VA/OA pair the best page size is the largest page size
|
||||
* where:
|
||||
*
|
||||
* 1) VA and OA start at the page. Bitwise this is the count of least
|
||||
* significant 0 bits.
|
||||
* This also implies that last_va/oa has the same prefix as va/oa.
|
||||
*/
|
||||
mask = va | oa;
|
||||
|
||||
/*
|
||||
* 2) The page size is not larger than the last_va (length). Since page
|
||||
* sizes are always power of two this can't be larger than the
|
||||
* largest power of two factor of the length.
|
||||
*/
|
||||
mask |= log2_to_int(vafls(len) - 1);
|
||||
|
||||
best_pgsz_lg2 = vaffs(mask);
|
||||
|
||||
/* Choose the highest bit <= best_pgsz_lg2 */
|
||||
if (best_pgsz_lg2 < PT_VADDR_MAX_LG2 - 1)
|
||||
pgsz_bitmap = log2_mod(pgsz_bitmap, best_pgsz_lg2 + 1);
|
||||
|
||||
pgsz_lg2 = vafls(pgsz_bitmap);
|
||||
if (!pgsz_lg2)
|
||||
return 0;
|
||||
|
||||
pgsz_lg2--;
|
||||
|
||||
PT_WARN_ON(log2_mod(va, pgsz_lg2) != 0);
|
||||
PT_WARN_ON(oalog2_mod(oa, pgsz_lg2) != 0);
|
||||
PT_WARN_ON(va + log2_to_int(pgsz_lg2) - 1 > last_va);
|
||||
PT_WARN_ON(!log2_div_eq(va, va + log2_to_int(pgsz_lg2) - 1, pgsz_lg2));
|
||||
PT_WARN_ON(
|
||||
!oalog2_div_eq(oa, oa + log2_to_int(pgsz_lg2) - 1, pgsz_lg2));
|
||||
return pgsz_lg2;
|
||||
}
|
||||
|
||||
#define _PT_MAKE_CALL_LEVEL(fn) \
|
||||
static __always_inline int fn(struct pt_range *range, void *arg, \
|
||||
unsigned int level, \
|
||||
struct pt_table_p *table) \
|
||||
{ \
|
||||
static_assert(PT_MAX_TOP_LEVEL <= 5); \
|
||||
if (level == 0) \
|
||||
return CONCATENATE(fn, 0)(range, arg, 0, table); \
|
||||
if (level == 1 || PT_MAX_TOP_LEVEL == 1) \
|
||||
return CONCATENATE(fn, 1)(range, arg, 1, table); \
|
||||
if (level == 2 || PT_MAX_TOP_LEVEL == 2) \
|
||||
return CONCATENATE(fn, 2)(range, arg, 2, table); \
|
||||
if (level == 3 || PT_MAX_TOP_LEVEL == 3) \
|
||||
return CONCATENATE(fn, 3)(range, arg, 3, table); \
|
||||
if (level == 4 || PT_MAX_TOP_LEVEL == 4) \
|
||||
return CONCATENATE(fn, 4)(range, arg, 4, table); \
|
||||
return CONCATENATE(fn, 5)(range, arg, 5, table); \
|
||||
}
|
||||
|
||||
static inline int __pt_make_level_fn_err(struct pt_range *range, void *arg,
|
||||
unsigned int unused_level,
|
||||
struct pt_table_p *table)
|
||||
{
|
||||
static_assert(PT_MAX_TOP_LEVEL <= 5);
|
||||
return -EPROTOTYPE;
|
||||
}
|
||||
|
||||
#define __PT_MAKE_LEVEL_FN(fn, level, descend_fn, do_fn) \
|
||||
static inline int fn(struct pt_range *range, void *arg, \
|
||||
unsigned int unused_level, \
|
||||
struct pt_table_p *table) \
|
||||
{ \
|
||||
return do_fn(range, arg, level, table, descend_fn); \
|
||||
}
|
||||
|
||||
/**
|
||||
* PT_MAKE_LEVELS() - Build an unwound walker
|
||||
* @fn: Name of the walker function
|
||||
* @do_fn: Function to call at each level
|
||||
*
|
||||
* This builds a function call tree that can be fully inlined.
|
||||
* The caller must provide a function body in an __always_inline function::
|
||||
*
|
||||
* static __always_inline int do_fn(struct pt_range *range, void *arg,
|
||||
* unsigned int level, struct pt_table_p *table,
|
||||
* pt_level_fn_t descend_fn)
|
||||
*
|
||||
* An inline function will be created for each table level that calls do_fn with
|
||||
* a compile time constant for level and a pointer to the next lower function.
|
||||
* This generates an optimally inlined walk where each of the functions sees a
|
||||
* constant level and can codegen the exact constants/etc for that level.
|
||||
*
|
||||
* Note this can produce a lot of code!
|
||||
*/
|
||||
#define PT_MAKE_LEVELS(fn, do_fn) \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 0), 0, __pt_make_level_fn_err, \
|
||||
do_fn); \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 1), 1, CONCATENATE(fn, 0), do_fn); \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 2), 2, CONCATENATE(fn, 1), do_fn); \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 3), 3, CONCATENATE(fn, 2), do_fn); \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 4), 4, CONCATENATE(fn, 3), do_fn); \
|
||||
__PT_MAKE_LEVEL_FN(CONCATENATE(fn, 5), 5, CONCATENATE(fn, 4), do_fn); \
|
||||
_PT_MAKE_CALL_LEVEL(fn)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* Helper macros for working with log2 values
|
||||
*
|
||||
*/
|
||||
#ifndef __GENERIC_PT_LOG2_H
|
||||
#define __GENERIC_PT_LOG2_H
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/limits.h>
|
||||
|
||||
/* Compute a */
|
||||
#define log2_to_int_t(type, a_lg2) ((type)(((type)1) << (a_lg2)))
|
||||
static_assert(log2_to_int_t(unsigned int, 0) == 1);
|
||||
|
||||
/* Compute a - 1 (aka all low bits set) */
|
||||
#define log2_to_max_int_t(type, a_lg2) ((type)(log2_to_int_t(type, a_lg2) - 1))
|
||||
|
||||
/* Compute a / b */
|
||||
#define log2_div_t(type, a, b_lg2) ((type)(((type)a) >> (b_lg2)))
|
||||
static_assert(log2_div_t(unsigned int, 4, 2) == 1);
|
||||
|
||||
/*
|
||||
* Compute:
|
||||
* a / c == b / c
|
||||
* aka the high bits are equal
|
||||
*/
|
||||
#define log2_div_eq_t(type, a, b, c_lg2) \
|
||||
(log2_div_t(type, (a) ^ (b), c_lg2) == 0)
|
||||
static_assert(log2_div_eq_t(unsigned int, 1, 1, 2));
|
||||
|
||||
/* Compute a % b */
|
||||
#define log2_mod_t(type, a, b_lg2) \
|
||||
((type)(((type)a) & log2_to_max_int_t(type, b_lg2)))
|
||||
static_assert(log2_mod_t(unsigned int, 1, 2) == 1);
|
||||
|
||||
/*
|
||||
* Compute:
|
||||
* a % b == b - 1
|
||||
* aka the low bits are all 1s
|
||||
*/
|
||||
#define log2_mod_eq_max_t(type, a, b_lg2) \
|
||||
(log2_mod_t(type, a, b_lg2) == log2_to_max_int_t(type, b_lg2))
|
||||
static_assert(log2_mod_eq_max_t(unsigned int, 3, 2));
|
||||
|
||||
/*
|
||||
* Return a value such that:
|
||||
* a / b == ret / b
|
||||
* ret % b == val
|
||||
* aka set the low bits to val. val must be < b
|
||||
*/
|
||||
#define log2_set_mod_t(type, a, val, b_lg2) \
|
||||
((((type)(a)) & (~log2_to_max_int_t(type, b_lg2))) | ((type)(val)))
|
||||
static_assert(log2_set_mod_t(unsigned int, 3, 1, 2) == 1);
|
||||
|
||||
/* Return a value such that:
|
||||
* a / b == ret / b
|
||||
* ret % b == b - 1
|
||||
* aka set the low bits to all 1s
|
||||
*/
|
||||
#define log2_set_mod_max_t(type, a, b_lg2) \
|
||||
(((type)(a)) | log2_to_max_int_t(type, b_lg2))
|
||||
static_assert(log2_set_mod_max_t(unsigned int, 2, 2) == 3);
|
||||
|
||||
/* Compute a * b */
|
||||
#define log2_mul_t(type, a, b_lg2) ((type)(((type)a) << (b_lg2)))
|
||||
static_assert(log2_mul_t(unsigned int, 2, 2) == 8);
|
||||
|
||||
#define _dispatch_sz(type, fn, a) \
|
||||
(sizeof(type) == 4 ? fn##32((u32)a) : fn##64(a))
|
||||
|
||||
/*
|
||||
* Return the highest value such that:
|
||||
* fls_t(u32, 0) == 0
|
||||
* fls_t(u3, 1) == 1
|
||||
* a >= log2_to_int(ret - 1)
|
||||
* aka find last set bit
|
||||
*/
|
||||
static inline unsigned int fls32(u32 a)
|
||||
{
|
||||
return fls(a);
|
||||
}
|
||||
#define fls_t(type, a) _dispatch_sz(type, fls, a)
|
||||
|
||||
/*
|
||||
* Return the highest value such that:
|
||||
* ffs_t(u32, 0) == UNDEFINED
|
||||
* ffs_t(u32, 1) == 0
|
||||
* log_mod(a, ret) == 0
|
||||
* aka find first set bit
|
||||
*/
|
||||
static inline unsigned int __ffs32(u32 a)
|
||||
{
|
||||
return __ffs(a);
|
||||
}
|
||||
#define ffs_t(type, a) _dispatch_sz(type, __ffs, a)
|
||||
|
||||
/*
|
||||
* Return the highest value such that:
|
||||
* ffz_t(u32, U32_MAX) == UNDEFINED
|
||||
* ffz_t(u32, 0) == 0
|
||||
* ffz_t(u32, 1) == 1
|
||||
* log_mod(a, ret) == log_to_max_int(ret)
|
||||
* aka find first zero bit
|
||||
*/
|
||||
static inline unsigned int ffz32(u32 a)
|
||||
{
|
||||
return ffz(a);
|
||||
}
|
||||
static inline unsigned int ffz64(u64 a)
|
||||
{
|
||||
if (sizeof(u64) == sizeof(unsigned long))
|
||||
return ffz(a);
|
||||
|
||||
if ((u32)a == U32_MAX)
|
||||
return ffz32(a >> 32) + 32;
|
||||
return ffz32(a);
|
||||
}
|
||||
#define ffz_t(type, a) _dispatch_sz(type, ffz, a)
|
||||
|
||||
#endif
|
||||
|
|
@ -13,6 +13,10 @@ config INTEL_IOMMU
|
|||
bool "Support for Intel IOMMU using DMA Remapping Devices"
|
||||
depends on PCI_MSI && ACPI && X86
|
||||
select IOMMU_API
|
||||
select GENERIC_PT
|
||||
select IOMMU_PT
|
||||
select IOMMU_PT_X86_64
|
||||
select IOMMU_PT_VTDSS
|
||||
select IOMMU_IOVA
|
||||
select IOMMU_IOPF
|
||||
select IOMMUFD_DRIVER if IOMMUFD
|
||||
|
|
@ -66,7 +70,7 @@ config INTEL_IOMMU_DEFAULT_ON
|
|||
|
||||
config INTEL_IOMMU_FLOPPY_WA
|
||||
def_bool y
|
||||
depends on X86
|
||||
depends on X86 && BLK_DEV_FD
|
||||
help
|
||||
Floppy disk drivers are known to bypass DMA API calls
|
||||
thereby failing to work when IOMMU is enabled. This
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -23,8 +23,8 @@
|
|||
#include <linux/xarray.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
|
|
@ -595,22 +595,20 @@ struct qi_batch {
|
|||
};
|
||||
|
||||
struct dmar_domain {
|
||||
int nid; /* node id */
|
||||
union {
|
||||
struct iommu_domain domain;
|
||||
struct pt_iommu iommu;
|
||||
/* First stage page table */
|
||||
struct pt_iommu_x86_64 fspt;
|
||||
/* Second stage page table */
|
||||
struct pt_iommu_vtdss sspt;
|
||||
};
|
||||
|
||||
struct xarray iommu_array; /* Attached IOMMU array */
|
||||
|
||||
u8 iommu_coherency: 1; /* indicate coherency of iommu access */
|
||||
u8 force_snooping : 1; /* Create IOPTEs with snoop control */
|
||||
u8 set_pte_snp:1;
|
||||
u8 use_first_level:1; /* DMA translation for the domain goes
|
||||
* through the first level page table,
|
||||
* otherwise, goes through the second
|
||||
* level.
|
||||
*/
|
||||
u8 force_snooping:1; /* Create PASID entry with snoop control */
|
||||
u8 dirty_tracking:1; /* Dirty tracking is enabled */
|
||||
u8 nested_parent:1; /* Has other domains nested on it */
|
||||
u8 has_mappings:1; /* Has mappings configured through
|
||||
* iommu_map() interface.
|
||||
*/
|
||||
u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write
|
||||
* buffer when creating mappings.
|
||||
*/
|
||||
|
|
@ -623,26 +621,9 @@ struct dmar_domain {
|
|||
struct list_head cache_tags; /* Cache tag list */
|
||||
struct qi_batch *qi_batch; /* Batched QI descriptors */
|
||||
|
||||
int iommu_superpage;/* Level of superpages supported:
|
||||
0 == 4KiB (no superpages), 1 == 2MiB,
|
||||
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
|
||||
union {
|
||||
/* DMA remapping domain */
|
||||
struct {
|
||||
/* virtual address */
|
||||
struct dma_pte *pgd;
|
||||
/* max guest address width */
|
||||
int gaw;
|
||||
/*
|
||||
* adjusted guest address width:
|
||||
* 0: level 2 30-bit
|
||||
* 1: level 3 39-bit
|
||||
* 2: level 4 48-bit
|
||||
* 3: level 5 57-bit
|
||||
*/
|
||||
int agaw;
|
||||
/* maximum mapped address */
|
||||
u64 max_addr;
|
||||
/* Protect the s1_domains list */
|
||||
spinlock_t s1_lock;
|
||||
/* Track s1_domains nested on this domain */
|
||||
|
|
@ -664,10 +645,10 @@ struct dmar_domain {
|
|||
struct mmu_notifier notifier;
|
||||
};
|
||||
};
|
||||
|
||||
struct iommu_domain domain; /* generic domain data structure for
|
||||
iommu core */
|
||||
};
|
||||
PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, sspt.iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, fspt.iommu, domain);
|
||||
|
||||
/*
|
||||
* In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters.
|
||||
|
|
@ -866,11 +847,6 @@ struct dma_pte {
|
|||
u64 val;
|
||||
};
|
||||
|
||||
static inline void dma_clear_pte(struct dma_pte *pte)
|
||||
{
|
||||
pte->val = 0;
|
||||
}
|
||||
|
||||
static inline u64 dma_pte_addr(struct dma_pte *pte)
|
||||
{
|
||||
#ifdef CONFIG_64BIT
|
||||
|
|
@ -886,32 +862,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
|
|||
return (pte->val & 3) != 0;
|
||||
}
|
||||
|
||||
static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
|
||||
unsigned long flags)
|
||||
{
|
||||
if (flags & IOMMU_DIRTY_NO_CLEAR)
|
||||
return (pte->val & DMA_SL_PTE_DIRTY) != 0;
|
||||
|
||||
return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
|
||||
(unsigned long *)&pte->val);
|
||||
}
|
||||
|
||||
static inline bool dma_pte_superpage(struct dma_pte *pte)
|
||||
{
|
||||
return (pte->val & DMA_PTE_LARGE_PAGE);
|
||||
}
|
||||
|
||||
static inline bool first_pte_in_page(struct dma_pte *pte)
|
||||
{
|
||||
return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline int nr_pte_to_next_page(struct dma_pte *pte)
|
||||
{
|
||||
return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
|
||||
(struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
|
||||
}
|
||||
|
||||
static inline bool context_present(struct context_entry *context)
|
||||
{
|
||||
return (context->lo & 1);
|
||||
|
|
@ -927,11 +882,6 @@ static inline int agaw_to_level(int agaw)
|
|||
return agaw + 2;
|
||||
}
|
||||
|
||||
static inline int agaw_to_width(int agaw)
|
||||
{
|
||||
return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
|
||||
}
|
||||
|
||||
static inline int width_to_agaw(int width)
|
||||
{
|
||||
return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
|
||||
|
|
@ -947,25 +897,6 @@ static inline int pfn_level_offset(u64 pfn, int level)
|
|||
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
|
||||
}
|
||||
|
||||
static inline u64 level_mask(int level)
|
||||
{
|
||||
return -1ULL << level_to_offset_bits(level);
|
||||
}
|
||||
|
||||
static inline u64 level_size(int level)
|
||||
{
|
||||
return 1ULL << level_to_offset_bits(level);
|
||||
}
|
||||
|
||||
static inline u64 align_to_level(u64 pfn, int level)
|
||||
{
|
||||
return (pfn + level_size(level) - 1) & level_mask(level);
|
||||
}
|
||||
|
||||
static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
|
||||
{
|
||||
return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
|
||||
}
|
||||
|
||||
static inline void context_set_present(struct context_entry *context)
|
||||
{
|
||||
|
|
@ -1097,7 +1028,7 @@ static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
|
|||
struct qi_desc *desc)
|
||||
{
|
||||
u8 dw = 0, dr = 0;
|
||||
int ih = 0;
|
||||
int ih = addr & 1;
|
||||
|
||||
if (cap_write_drain(iommu->cap))
|
||||
dw = 1;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
#include "pasid.h"
|
||||
|
||||
static int intel_nested_attach_dev(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
||||
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
|
||||
|
|
@ -29,11 +29,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
|
|||
|
||||
device_block_translation(dev);
|
||||
|
||||
if (iommu->agaw < dmar_domain->s2_domain->agaw) {
|
||||
dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stage-1 domain cannot work alone, it is nested on a s2_domain.
|
||||
* The s2_domain will be used in nested translation, hence needs
|
||||
|
|
|
|||
|
|
@ -366,7 +366,7 @@ static void pasid_pte_config_first_level(struct intel_iommu *iommu,
|
|||
|
||||
pasid_set_domain_id(pte, did);
|
||||
pasid_set_address_width(pte, iommu->agaw);
|
||||
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
||||
pasid_set_page_snoop(pte, flags & PASID_FLAG_PWSNP);
|
||||
|
||||
/* Setup Present and PASID Granular Transfer Type: */
|
||||
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
|
||||
|
|
@ -461,19 +461,22 @@ int intel_pasid_replace_first_level(struct intel_iommu *iommu,
|
|||
*/
|
||||
static void pasid_pte_config_second_level(struct intel_iommu *iommu,
|
||||
struct pasid_entry *pte,
|
||||
u64 pgd_val, int agaw, u16 did,
|
||||
bool dirty_tracking)
|
||||
struct dmar_domain *domain, u16 did)
|
||||
{
|
||||
struct pt_iommu_vtdss_hw_info pt_info;
|
||||
|
||||
lockdep_assert_held(&iommu->lock);
|
||||
|
||||
pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
|
||||
pasid_clear_entry(pte);
|
||||
pasid_set_domain_id(pte, did);
|
||||
pasid_set_slptr(pte, pgd_val);
|
||||
pasid_set_address_width(pte, agaw);
|
||||
pasid_set_slptr(pte, pt_info.ssptptr);
|
||||
pasid_set_address_width(pte, pt_info.aw);
|
||||
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
|
||||
pasid_set_fault_enable(pte);
|
||||
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
||||
if (dirty_tracking)
|
||||
pasid_set_page_snoop(pte, !(domain->sspt.vtdss_pt.common.features &
|
||||
BIT(PT_FEAT_DMA_INCOHERENT)));
|
||||
if (domain->dirty_tracking)
|
||||
pasid_set_ssade(pte);
|
||||
|
||||
pasid_set_present(pte);
|
||||
|
|
@ -484,10 +487,9 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
|
|||
struct device *dev, u32 pasid)
|
||||
{
|
||||
struct pasid_entry *pte;
|
||||
struct dma_pte *pgd;
|
||||
u64 pgd_val;
|
||||
u16 did;
|
||||
|
||||
|
||||
/*
|
||||
* If hardware advertises no support for second level
|
||||
* translation, return directly.
|
||||
|
|
@ -498,8 +500,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
pgd = domain->pgd;
|
||||
pgd_val = virt_to_phys(pgd);
|
||||
did = domain_id_iommu(domain, iommu);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
|
|
@ -514,8 +514,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
|
|||
return -EBUSY;
|
||||
}
|
||||
|
||||
pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
|
||||
did, domain->dirty_tracking);
|
||||
pasid_pte_config_second_level(iommu, pte, domain, did);
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
pasid_flush_caches(iommu, pte, pasid, did);
|
||||
|
|
@ -529,8 +528,6 @@ int intel_pasid_replace_second_level(struct intel_iommu *iommu,
|
|||
u32 pasid)
|
||||
{
|
||||
struct pasid_entry *pte, new_pte;
|
||||
struct dma_pte *pgd;
|
||||
u64 pgd_val;
|
||||
u16 did;
|
||||
|
||||
/*
|
||||
|
|
@ -543,13 +540,9 @@ int intel_pasid_replace_second_level(struct intel_iommu *iommu,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
pgd = domain->pgd;
|
||||
pgd_val = virt_to_phys(pgd);
|
||||
did = domain_id_iommu(domain, iommu);
|
||||
|
||||
pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
|
||||
domain->agaw, did,
|
||||
domain->dirty_tracking);
|
||||
pasid_pte_config_second_level(iommu, &new_pte, domain, did);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
pte = intel_pasid_get_entry(dev, pasid);
|
||||
|
|
@ -747,10 +740,12 @@ static void pasid_pte_config_nestd(struct intel_iommu *iommu,
|
|||
struct dmar_domain *s2_domain,
|
||||
u16 did)
|
||||
{
|
||||
struct dma_pte *pgd = s2_domain->pgd;
|
||||
struct pt_iommu_vtdss_hw_info pt_info;
|
||||
|
||||
lockdep_assert_held(&iommu->lock);
|
||||
|
||||
pt_iommu_vtdss_hw_info(&s2_domain->sspt, &pt_info);
|
||||
|
||||
pasid_clear_entry(pte);
|
||||
|
||||
if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
|
||||
|
|
@ -770,11 +765,12 @@ static void pasid_pte_config_nestd(struct intel_iommu *iommu,
|
|||
if (s2_domain->force_snooping)
|
||||
pasid_set_pgsnp(pte);
|
||||
|
||||
pasid_set_slptr(pte, virt_to_phys(pgd));
|
||||
pasid_set_slptr(pte, pt_info.ssptptr);
|
||||
pasid_set_fault_enable(pte);
|
||||
pasid_set_domain_id(pte, did);
|
||||
pasid_set_address_width(pte, s2_domain->agaw);
|
||||
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
||||
pasid_set_address_width(pte, pt_info.aw);
|
||||
pasid_set_page_snoop(pte, !(s2_domain->sspt.vtdss_pt.common.features &
|
||||
BIT(PT_FEAT_DMA_INCOHERENT)));
|
||||
if (s2_domain->dirty_tracking)
|
||||
pasid_set_ssade(pte);
|
||||
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#define PASID_FLAG_NESTED BIT(1)
|
||||
#define PASID_FLAG_PAGE_SNOOP BIT(2)
|
||||
#define PASID_FLAG_PWSNP BIT(2)
|
||||
|
||||
/*
|
||||
* The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
|
|||
|
||||
/* Setup the pasid table: */
|
||||
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
|
||||
sflags |= PASID_FLAG_PWSNP;
|
||||
ret = __domain_setup_first_level(iommu, dev, pasid,
|
||||
FLPT_DEFAULT_DID, __pa(mm->pgd),
|
||||
sflags, old);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,214 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* CPU-agnostic ARM page table allocator.
|
||||
*
|
||||
* Copyright (C) 2014 ARM Limited
|
||||
*
|
||||
* Author: Will Deacon <will.deacon@arm.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt
|
||||
|
||||
#include <kunit/device.h>
|
||||
#include <kunit/test.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "io-pgtable-arm.h"
|
||||
|
||||
static struct io_pgtable_cfg *cfg_cookie;
|
||||
|
||||
static void dummy_tlb_flush_all(void *cookie)
|
||||
{
|
||||
WARN_ON(cookie != cfg_cookie);
|
||||
}
|
||||
|
||||
static void dummy_tlb_flush(unsigned long iova, size_t size,
|
||||
size_t granule, void *cookie)
|
||||
{
|
||||
WARN_ON(cookie != cfg_cookie);
|
||||
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
|
||||
}
|
||||
|
||||
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
|
||||
unsigned long iova, size_t granule,
|
||||
void *cookie)
|
||||
{
|
||||
dummy_tlb_flush(iova, granule, granule, cookie);
|
||||
}
|
||||
|
||||
static const struct iommu_flush_ops dummy_tlb_ops = {
|
||||
.tlb_flush_all = dummy_tlb_flush_all,
|
||||
.tlb_flush_walk = dummy_tlb_flush,
|
||||
.tlb_add_page = dummy_tlb_add_page,
|
||||
};
|
||||
|
||||
#define __FAIL(test, i) ({ \
|
||||
KUNIT_FAIL(test, "test failed for fmt idx %d\n", (i)); \
|
||||
-EFAULT; \
|
||||
})
|
||||
|
||||
static int arm_lpae_run_tests(struct kunit *test, struct io_pgtable_cfg *cfg)
|
||||
{
|
||||
static const enum io_pgtable_fmt fmts[] = {
|
||||
ARM_64_LPAE_S1,
|
||||
ARM_64_LPAE_S2,
|
||||
};
|
||||
|
||||
int i, j;
|
||||
unsigned long iova;
|
||||
size_t size, mapped;
|
||||
struct io_pgtable_ops *ops;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
|
||||
cfg_cookie = cfg;
|
||||
ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
|
||||
if (!ops) {
|
||||
kunit_err(test, "failed to allocate io pgtable ops\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial sanity checks.
|
||||
* Empty page tables shouldn't provide any translations.
|
||||
*/
|
||||
if (ops->iova_to_phys(ops, 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, SZ_1G + 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, SZ_2G + 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
/*
|
||||
* Distinct mappings of different granule sizes.
|
||||
*/
|
||||
iova = 0;
|
||||
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
|
||||
size = 1UL << j;
|
||||
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_READ | IOMMU_WRITE |
|
||||
IOMMU_NOEXEC | IOMMU_CACHE,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(test, i);
|
||||
|
||||
/* Overlapping mappings */
|
||||
if (!ops->map_pages(ops, iova, iova + size, size, 1,
|
||||
IOMMU_READ | IOMMU_NOEXEC,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(test, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
iova += SZ_1G;
|
||||
}
|
||||
|
||||
/* Full unmap */
|
||||
iova = 0;
|
||||
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
|
||||
size = 1UL << j;
|
||||
|
||||
if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
|
||||
return __FAIL(test, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
/* Remap full block */
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_WRITE, GFP_KERNEL, &mapped))
|
||||
return __FAIL(test, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
|
||||
return __FAIL(test, i);
|
||||
|
||||
iova += SZ_1G;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map/unmap the last largest supported page of the IAS, this can
|
||||
* trigger corner cases in the concatednated page tables.
|
||||
*/
|
||||
mapped = 0;
|
||||
size = 1UL << __fls(cfg->pgsize_bitmap);
|
||||
iova = (1UL << cfg->ias) - size;
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_READ | IOMMU_WRITE |
|
||||
IOMMU_NOEXEC | IOMMU_CACHE,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(test, i);
|
||||
if (mapped != size)
|
||||
return __FAIL(test, i);
|
||||
if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
|
||||
return __FAIL(test, i);
|
||||
|
||||
free_io_pgtable_ops(ops);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arm_lpae_do_selftests(struct kunit *test)
|
||||
{
|
||||
static const unsigned long pgsize[] = {
|
||||
SZ_4K | SZ_2M | SZ_1G,
|
||||
SZ_16K | SZ_32M,
|
||||
SZ_64K | SZ_512M,
|
||||
};
|
||||
|
||||
static const unsigned int address_size[] = {
|
||||
32, 36, 40, 42, 44, 48,
|
||||
};
|
||||
|
||||
int i, j, k, pass = 0, fail = 0;
|
||||
struct device *dev;
|
||||
struct io_pgtable_cfg cfg = {
|
||||
.tlb = &dummy_tlb_ops,
|
||||
.coherent_walk = true,
|
||||
.quirks = IO_PGTABLE_QUIRK_NO_WARN,
|
||||
};
|
||||
|
||||
dev = kunit_device_register(test, "io-pgtable-test");
|
||||
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev);
|
||||
if (IS_ERR_OR_NULL(dev))
|
||||
return;
|
||||
|
||||
cfg.iommu_dev = dev;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(address_size); ++j) {
|
||||
/* Don't use ias > oas as it is not valid for stage-2. */
|
||||
for (k = 0; k <= j; ++k) {
|
||||
cfg.pgsize_bitmap = pgsize[i];
|
||||
cfg.ias = address_size[k];
|
||||
cfg.oas = address_size[j];
|
||||
kunit_info(test, "pgsize_bitmap 0x%08lx, IAS %u OAS %u\n",
|
||||
pgsize[i], cfg.ias, cfg.oas);
|
||||
if (arm_lpae_run_tests(test, &cfg))
|
||||
fail++;
|
||||
else
|
||||
pass++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kunit_info(test, "completed with %d PASS %d FAIL\n", pass, fail);
|
||||
}
|
||||
|
||||
static struct kunit_case io_pgtable_arm_test_cases[] = {
|
||||
KUNIT_CASE(arm_lpae_do_selftests),
|
||||
{},
|
||||
};
|
||||
|
||||
static struct kunit_suite io_pgtable_arm_test = {
|
||||
.name = "io-pgtable-arm-test",
|
||||
.test_cases = io_pgtable_arm_test_cases,
|
||||
};
|
||||
|
||||
kunit_test_suite(io_pgtable_arm_test);
|
||||
|
||||
MODULE_DESCRIPTION("io-pgtable-arm library kunit tests");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -12,8 +12,6 @@
|
|||
#include <linux/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/device/faux.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
|
|
@ -1267,204 +1265,3 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
|
|||
.alloc = arm_mali_lpae_alloc_pgtable,
|
||||
.free = arm_lpae_free_pgtable,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
|
||||
|
||||
static struct io_pgtable_cfg *cfg_cookie __initdata;
|
||||
|
||||
static void __init dummy_tlb_flush_all(void *cookie)
|
||||
{
|
||||
WARN_ON(cookie != cfg_cookie);
|
||||
}
|
||||
|
||||
static void __init dummy_tlb_flush(unsigned long iova, size_t size,
|
||||
size_t granule, void *cookie)
|
||||
{
|
||||
WARN_ON(cookie != cfg_cookie);
|
||||
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
|
||||
}
|
||||
|
||||
static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
|
||||
unsigned long iova, size_t granule,
|
||||
void *cookie)
|
||||
{
|
||||
dummy_tlb_flush(iova, granule, granule, cookie);
|
||||
}
|
||||
|
||||
static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
|
||||
.tlb_flush_all = dummy_tlb_flush_all,
|
||||
.tlb_flush_walk = dummy_tlb_flush,
|
||||
.tlb_add_page = dummy_tlb_add_page,
|
||||
};
|
||||
|
||||
static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
|
||||
{
|
||||
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
|
||||
struct io_pgtable_cfg *cfg = &data->iop.cfg;
|
||||
|
||||
pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
|
||||
cfg->pgsize_bitmap, cfg->ias);
|
||||
pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
|
||||
ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
|
||||
ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
|
||||
}
|
||||
|
||||
#define __FAIL(ops, i) ({ \
|
||||
WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
|
||||
arm_lpae_dump_ops(ops); \
|
||||
-EFAULT; \
|
||||
})
|
||||
|
||||
static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
|
||||
{
|
||||
static const enum io_pgtable_fmt fmts[] __initconst = {
|
||||
ARM_64_LPAE_S1,
|
||||
ARM_64_LPAE_S2,
|
||||
};
|
||||
|
||||
int i, j;
|
||||
unsigned long iova;
|
||||
size_t size, mapped;
|
||||
struct io_pgtable_ops *ops;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
|
||||
cfg_cookie = cfg;
|
||||
ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
|
||||
if (!ops) {
|
||||
pr_err("selftest: failed to allocate io pgtable ops\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial sanity checks.
|
||||
* Empty page tables shouldn't provide any translations.
|
||||
*/
|
||||
if (ops->iova_to_phys(ops, 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, SZ_1G + 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, SZ_2G + 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
/*
|
||||
* Distinct mappings of different granule sizes.
|
||||
*/
|
||||
iova = 0;
|
||||
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
|
||||
size = 1UL << j;
|
||||
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_READ | IOMMU_WRITE |
|
||||
IOMMU_NOEXEC | IOMMU_CACHE,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
/* Overlapping mappings */
|
||||
if (!ops->map_pages(ops, iova, iova + size, size, 1,
|
||||
IOMMU_READ | IOMMU_NOEXEC,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
iova += SZ_1G;
|
||||
}
|
||||
|
||||
/* Full unmap */
|
||||
iova = 0;
|
||||
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
|
||||
size = 1UL << j;
|
||||
|
||||
if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
|
||||
return __FAIL(ops, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
/* Remap full block */
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_WRITE, GFP_KERNEL, &mapped))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
|
||||
return __FAIL(ops, i);
|
||||
|
||||
iova += SZ_1G;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map/unmap the last largest supported page of the IAS, this can
|
||||
* trigger corner cases in the concatednated page tables.
|
||||
*/
|
||||
mapped = 0;
|
||||
size = 1UL << __fls(cfg->pgsize_bitmap);
|
||||
iova = (1UL << cfg->ias) - size;
|
||||
if (ops->map_pages(ops, iova, iova, size, 1,
|
||||
IOMMU_READ | IOMMU_WRITE |
|
||||
IOMMU_NOEXEC | IOMMU_CACHE,
|
||||
GFP_KERNEL, &mapped))
|
||||
return __FAIL(ops, i);
|
||||
if (mapped != size)
|
||||
return __FAIL(ops, i);
|
||||
if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
|
||||
return __FAIL(ops, i);
|
||||
|
||||
free_io_pgtable_ops(ops);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init arm_lpae_do_selftests(void)
|
||||
{
|
||||
static const unsigned long pgsize[] __initconst = {
|
||||
SZ_4K | SZ_2M | SZ_1G,
|
||||
SZ_16K | SZ_32M,
|
||||
SZ_64K | SZ_512M,
|
||||
};
|
||||
|
||||
static const unsigned int address_size[] __initconst = {
|
||||
32, 36, 40, 42, 44, 48,
|
||||
};
|
||||
|
||||
int i, j, k, pass = 0, fail = 0;
|
||||
struct faux_device *dev;
|
||||
struct io_pgtable_cfg cfg = {
|
||||
.tlb = &dummy_tlb_ops,
|
||||
.coherent_walk = true,
|
||||
.quirks = IO_PGTABLE_QUIRK_NO_WARN,
|
||||
};
|
||||
|
||||
dev = faux_device_create("io-pgtable-test", NULL, 0);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
cfg.iommu_dev = &dev->dev;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(address_size); ++j) {
|
||||
/* Don't use ias > oas as it is not valid for stage-2. */
|
||||
for (k = 0; k <= j; ++k) {
|
||||
cfg.pgsize_bitmap = pgsize[i];
|
||||
cfg.ias = address_size[k];
|
||||
cfg.oas = address_size[j];
|
||||
pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u OAS %u\n",
|
||||
pgsize[i], cfg.ias, cfg.oas);
|
||||
if (arm_lpae_run_tests(&cfg))
|
||||
fail++;
|
||||
else
|
||||
pass++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
|
||||
faux_device_destroy(dev);
|
||||
|
||||
return fail ? -EFAULT : 0;
|
||||
}
|
||||
subsys_initcall(arm_lpae_do_selftests);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -28,10 +28,6 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
|
|||
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
|
||||
[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
|
||||
#endif
|
||||
#ifdef CONFIG_AMD_IOMMU
|
||||
[AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns,
|
||||
[AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int check_custom_allocator(enum io_pgtable_fmt fmt,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
* Pasha Tatashin <pasha.tatashin@soleen.com>
|
||||
*/
|
||||
#include "iommu-pages.h"
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
|
|
@ -22,6 +23,11 @@ IOPTDESC_MATCH(memcg_data, memcg_data);
|
|||
#undef IOPTDESC_MATCH
|
||||
static_assert(sizeof(struct ioptdesc) <= sizeof(struct page));
|
||||
|
||||
static inline size_t ioptdesc_mem_size(struct ioptdesc *desc)
|
||||
{
|
||||
return 1UL << (folio_order(ioptdesc_folio(desc)) + PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from
|
||||
* specific NUMA node
|
||||
|
|
@ -36,6 +42,7 @@ static_assert(sizeof(struct ioptdesc) <= sizeof(struct page));
|
|||
*/
|
||||
void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size)
|
||||
{
|
||||
struct ioptdesc *iopt;
|
||||
unsigned long pgcnt;
|
||||
struct folio *folio;
|
||||
unsigned int order;
|
||||
|
|
@ -60,6 +67,9 @@ void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size)
|
|||
if (unlikely(!folio))
|
||||
return NULL;
|
||||
|
||||
iopt = folio_ioptdesc(folio);
|
||||
iopt->incoherent = false;
|
||||
|
||||
/*
|
||||
* All page allocations that should be reported to as "iommu-pagetables"
|
||||
* to userspace must use one of the functions below. This includes
|
||||
|
|
@ -80,7 +90,10 @@ EXPORT_SYMBOL_GPL(iommu_alloc_pages_node_sz);
|
|||
static void __iommu_free_desc(struct ioptdesc *iopt)
|
||||
{
|
||||
struct folio *folio = ioptdesc_folio(iopt);
|
||||
const unsigned long pgcnt = 1UL << folio_order(folio);
|
||||
const unsigned long pgcnt = folio_nr_pages(folio);
|
||||
|
||||
if (IOMMU_PAGES_USE_DMA_API)
|
||||
WARN_ON_ONCE(iopt->incoherent);
|
||||
|
||||
mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, -pgcnt);
|
||||
lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, -pgcnt);
|
||||
|
|
@ -117,3 +130,124 @@ void iommu_put_pages_list(struct iommu_pages_list *list)
|
|||
__iommu_free_desc(iopt);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_put_pages_list);
|
||||
|
||||
/**
|
||||
* iommu_pages_start_incoherent - Setup the page for cache incoherent operation
|
||||
* @virt: The page to setup
|
||||
* @dma_dev: The iommu device
|
||||
*
|
||||
* For incoherent memory this will use the DMA API to manage the cache flushing
|
||||
* on some arches. This is a lot of complexity compared to just calling
|
||||
* arch_sync_dma_for_device(), but it is what the existing ARM iommu drivers
|
||||
* have been doing. The DMA API requires keeping track of the DMA map and
|
||||
* freeing it when required. This keeps track of the dma map inside the ioptdesc
|
||||
* so that error paths are simple for the caller.
|
||||
*/
|
||||
int iommu_pages_start_incoherent(void *virt, struct device *dma_dev)
|
||||
{
|
||||
struct ioptdesc *iopt = virt_to_ioptdesc(virt);
|
||||
dma_addr_t dma;
|
||||
|
||||
if (WARN_ON(iopt->incoherent))
|
||||
return -EINVAL;
|
||||
|
||||
if (!IOMMU_PAGES_USE_DMA_API) {
|
||||
iommu_pages_flush_incoherent(dma_dev, virt, 0,
|
||||
ioptdesc_mem_size(iopt));
|
||||
} else {
|
||||
dma = dma_map_single(dma_dev, virt, ioptdesc_mem_size(iopt),
|
||||
DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dma_dev, dma))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The DMA API is not allowed to do anything other than DMA
|
||||
* direct. It would be nice to also check
|
||||
* dev_is_dma_coherent(dma_dev));
|
||||
*/
|
||||
if (WARN_ON(dma != virt_to_phys(virt))) {
|
||||
dma_unmap_single(dma_dev, dma, ioptdesc_mem_size(iopt),
|
||||
DMA_TO_DEVICE);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
iopt->incoherent = 1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_pages_start_incoherent);
|
||||
|
||||
/**
|
||||
* iommu_pages_start_incoherent_list - Make a list of pages incoherent
|
||||
* @list: The list of pages to setup
|
||||
* @dma_dev: The iommu device
|
||||
*
|
||||
* Perform iommu_pages_start_incoherent() across all of list.
|
||||
*
|
||||
* If this fails the caller must call iommu_pages_stop_incoherent_list().
|
||||
*/
|
||||
int iommu_pages_start_incoherent_list(struct iommu_pages_list *list,
|
||||
struct device *dma_dev)
|
||||
{
|
||||
struct ioptdesc *cur;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(cur, &list->pages, iopt_freelist_elm) {
|
||||
if (WARN_ON(cur->incoherent))
|
||||
continue;
|
||||
|
||||
ret = iommu_pages_start_incoherent(
|
||||
folio_address(ioptdesc_folio(cur)), dma_dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_pages_start_incoherent_list);
|
||||
|
||||
/**
|
||||
* iommu_pages_stop_incoherent_list - Undo incoherence across a list
|
||||
* @list: The list of pages to release
|
||||
* @dma_dev: The iommu device
|
||||
*
|
||||
* Revert iommu_pages_start_incoherent() across all of the list. Pages that did
|
||||
* not call or succeed iommu_pages_start_incoherent() will be ignored.
|
||||
*/
|
||||
#if IOMMU_PAGES_USE_DMA_API
|
||||
void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list,
|
||||
struct device *dma_dev)
|
||||
{
|
||||
struct ioptdesc *cur;
|
||||
|
||||
list_for_each_entry(cur, &list->pages, iopt_freelist_elm) {
|
||||
struct folio *folio = ioptdesc_folio(cur);
|
||||
|
||||
if (!cur->incoherent)
|
||||
continue;
|
||||
dma_unmap_single(dma_dev, virt_to_phys(folio_address(folio)),
|
||||
ioptdesc_mem_size(cur), DMA_TO_DEVICE);
|
||||
cur->incoherent = 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_pages_stop_incoherent_list);
|
||||
|
||||
/**
|
||||
* iommu_pages_free_incoherent - Free an incoherent page
|
||||
* @virt: virtual address of the page to be freed.
|
||||
* @dma_dev: The iommu device
|
||||
*
|
||||
* If the page is incoherent it made coherent again then freed.
|
||||
*/
|
||||
void iommu_pages_free_incoherent(void *virt, struct device *dma_dev)
|
||||
{
|
||||
struct ioptdesc *iopt = virt_to_ioptdesc(virt);
|
||||
|
||||
if (iopt->incoherent) {
|
||||
dma_unmap_single(dma_dev, virt_to_phys(virt),
|
||||
ioptdesc_mem_size(iopt), DMA_TO_DEVICE);
|
||||
iopt->incoherent = 0;
|
||||
}
|
||||
__iommu_free_desc(iopt);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_pages_free_incoherent);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,7 +21,10 @@ struct ioptdesc {
|
|||
|
||||
struct list_head iopt_freelist_elm;
|
||||
unsigned long __page_mapping;
|
||||
pgoff_t __index;
|
||||
union {
|
||||
u8 incoherent;
|
||||
pgoff_t __index;
|
||||
};
|
||||
void *_private;
|
||||
|
||||
unsigned int __page_type;
|
||||
|
|
@ -98,4 +101,48 @@ static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size)
|
|||
return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size);
|
||||
}
|
||||
|
||||
#endif /* __IOMMU_PAGES_H */
|
||||
int iommu_pages_start_incoherent(void *virt, struct device *dma_dev);
|
||||
int iommu_pages_start_incoherent_list(struct iommu_pages_list *list,
|
||||
struct device *dma_dev);
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#define IOMMU_PAGES_USE_DMA_API 0
|
||||
#include <linux/cacheflush.h>
|
||||
|
||||
static inline void iommu_pages_flush_incoherent(struct device *dma_dev,
|
||||
void *virt, size_t offset,
|
||||
size_t len)
|
||||
{
|
||||
clflush_cache_range(virt + offset, len);
|
||||
}
|
||||
static inline void
|
||||
iommu_pages_stop_incoherent_list(struct iommu_pages_list *list,
|
||||
struct device *dma_dev)
|
||||
{
|
||||
/*
|
||||
* For performance leave the incoherent flag alone which turns this into
|
||||
* a NOP. For X86 the rest of the stop/free flow ignores the flag.
|
||||
*/
|
||||
}
|
||||
static inline void iommu_pages_free_incoherent(void *virt,
|
||||
struct device *dma_dev)
|
||||
{
|
||||
iommu_free_pages(virt);
|
||||
}
|
||||
#else
|
||||
#define IOMMU_PAGES_USE_DMA_API 1
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
static inline void iommu_pages_flush_incoherent(struct device *dma_dev,
|
||||
void *virt, size_t offset,
|
||||
size_t len)
|
||||
{
|
||||
dma_sync_single_for_device(dma_dev, (uintptr_t)virt + offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
}
|
||||
void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list,
|
||||
struct device *dma_dev);
|
||||
void iommu_pages_free_incoherent(void *virt, struct device *dma_dev);
|
||||
#endif
|
||||
|
||||
#endif /* __IOMMU_PAGES_H */
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ static int iommu_bus_notifier(struct notifier_block *nb,
|
|||
unsigned long action, void *data);
|
||||
static void iommu_release_device(struct device *dev);
|
||||
static int __iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev);
|
||||
struct device *dev, struct iommu_domain *old);
|
||||
static int __iommu_attach_group(struct iommu_domain *domain,
|
||||
struct iommu_group *group);
|
||||
static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev,
|
||||
|
|
@ -114,6 +114,7 @@ enum {
|
|||
static int __iommu_device_set_domain(struct iommu_group *group,
|
||||
struct device *dev,
|
||||
struct iommu_domain *new_domain,
|
||||
struct iommu_domain *old_domain,
|
||||
unsigned int flags);
|
||||
static int __iommu_group_set_domain_internal(struct iommu_group *group,
|
||||
struct iommu_domain *new_domain,
|
||||
|
|
@ -542,8 +543,21 @@ static void iommu_deinit_device(struct device *dev)
|
|||
* Regardless, if a delayed attach never occurred, then the release
|
||||
* should still avoid touching any hardware configuration either.
|
||||
*/
|
||||
if (!dev->iommu->attach_deferred && ops->release_domain)
|
||||
ops->release_domain->ops->attach_dev(ops->release_domain, dev);
|
||||
if (!dev->iommu->attach_deferred && ops->release_domain) {
|
||||
struct iommu_domain *release_domain = ops->release_domain;
|
||||
|
||||
/*
|
||||
* If the device requires direct mappings then it should not
|
||||
* be parked on a BLOCKED domain during release as that would
|
||||
* break the direct mappings.
|
||||
*/
|
||||
if (dev->iommu->require_direct && ops->identity_domain &&
|
||||
release_domain == ops->blocked_domain)
|
||||
release_domain = ops->identity_domain;
|
||||
|
||||
release_domain->ops->attach_dev(release_domain, dev,
|
||||
group->domain);
|
||||
}
|
||||
|
||||
if (ops->release_device)
|
||||
ops->release_device(dev);
|
||||
|
|
@ -628,7 +642,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
|
|||
if (group->default_domain)
|
||||
iommu_create_device_direct_mappings(group->default_domain, dev);
|
||||
if (group->domain) {
|
||||
ret = __iommu_device_set_domain(group, dev, group->domain, 0);
|
||||
ret = __iommu_device_set_domain(group, dev, group->domain, NULL,
|
||||
0);
|
||||
if (ret)
|
||||
goto err_remove_gdev;
|
||||
} else if (!group->default_domain && !group_list) {
|
||||
|
|
@ -2115,14 +2130,14 @@ static void __iommu_group_set_core_domain(struct iommu_group *group)
|
|||
}
|
||||
|
||||
static int __iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (unlikely(domain->ops->attach_dev == NULL))
|
||||
return -ENODEV;
|
||||
|
||||
ret = domain->ops->attach_dev(domain, dev);
|
||||
ret = domain->ops->attach_dev(domain, dev, old);
|
||||
if (ret)
|
||||
return ret;
|
||||
dev->iommu->attach_deferred = 0;
|
||||
|
|
@ -2171,7 +2186,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
|
|||
int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
|
||||
{
|
||||
if (dev->iommu && dev->iommu->attach_deferred)
|
||||
return __iommu_attach_device(domain, dev);
|
||||
return __iommu_attach_device(domain, dev, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2284,6 +2299,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_group);
|
|||
static int __iommu_device_set_domain(struct iommu_group *group,
|
||||
struct device *dev,
|
||||
struct iommu_domain *new_domain,
|
||||
struct iommu_domain *old_domain,
|
||||
unsigned int flags)
|
||||
{
|
||||
int ret;
|
||||
|
|
@ -2309,7 +2325,7 @@ static int __iommu_device_set_domain(struct iommu_group *group,
|
|||
dev->iommu->attach_deferred = 0;
|
||||
}
|
||||
|
||||
ret = __iommu_attach_device(new_domain, dev);
|
||||
ret = __iommu_attach_device(new_domain, dev, old_domain);
|
||||
if (ret) {
|
||||
/*
|
||||
* If we have a blocking domain then try to attach that in hopes
|
||||
|
|
@ -2319,7 +2335,8 @@ static int __iommu_device_set_domain(struct iommu_group *group,
|
|||
if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
|
||||
group->blocking_domain &&
|
||||
group->blocking_domain != new_domain)
|
||||
__iommu_attach_device(group->blocking_domain, dev);
|
||||
__iommu_attach_device(group->blocking_domain, dev,
|
||||
old_domain);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -2366,7 +2383,7 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
|
|||
result = 0;
|
||||
for_each_group_device(group, gdev) {
|
||||
ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
|
||||
flags);
|
||||
group->domain, flags);
|
||||
if (ret) {
|
||||
result = ret;
|
||||
/*
|
||||
|
|
@ -2391,6 +2408,9 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
|
|||
*/
|
||||
last_gdev = gdev;
|
||||
for_each_group_device(group, gdev) {
|
||||
/* No need to revert the last gdev that failed to set domain */
|
||||
if (gdev == last_gdev)
|
||||
break;
|
||||
/*
|
||||
* A NULL domain can happen only for first probe, in which case
|
||||
* we leave group->domain as NULL and let release clean
|
||||
|
|
@ -2398,10 +2418,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
|
|||
*/
|
||||
if (group->domain)
|
||||
WARN_ON(__iommu_device_set_domain(
|
||||
group, gdev->dev, group->domain,
|
||||
group, gdev->dev, group->domain, new_domain,
|
||||
IOMMU_SET_DOMAIN_MUST_SUCCEED));
|
||||
if (gdev == last_gdev)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ config IOMMUFD_TEST
|
|||
depends on DEBUG_KERNEL
|
||||
depends on FAULT_INJECTION
|
||||
depends on RUNTIME_TESTING_MENU
|
||||
depends on IOMMU_PT_AMDV1
|
||||
select IOMMUFD_DRIVER
|
||||
default n
|
||||
help
|
||||
|
|
|
|||
|
|
@ -31,9 +31,18 @@ enum {
|
|||
IOMMU_TEST_OP_PASID_CHECK_HWPT,
|
||||
};
|
||||
|
||||
enum {
|
||||
MOCK_IOMMUPT_DEFAULT = 0,
|
||||
MOCK_IOMMUPT_HUGE,
|
||||
MOCK_IOMMUPT_AMDV1,
|
||||
};
|
||||
|
||||
/* These values are true for MOCK_IOMMUPT_DEFAULT */
|
||||
enum {
|
||||
MOCK_APERTURE_START = 1UL << 24,
|
||||
MOCK_APERTURE_LAST = (1UL << 31) - 1,
|
||||
MOCK_PAGE_SIZE = 2048,
|
||||
MOCK_HUGE_PAGE_SIZE = 512 * MOCK_PAGE_SIZE,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
|
@ -52,7 +61,6 @@ enum {
|
|||
|
||||
enum {
|
||||
MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
|
||||
MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
|
||||
MOCK_FLAGS_DEVICE_PASID = 1 << 2,
|
||||
};
|
||||
|
||||
|
|
@ -205,6 +213,7 @@ struct iommu_test_hw_info {
|
|||
*/
|
||||
struct iommu_hwpt_selftest {
|
||||
__u32 iotlb;
|
||||
__u32 pagetable_type;
|
||||
};
|
||||
|
||||
/* Should not be equal to any defined value in enum iommu_hwpt_invalidate_data_type */
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
#include "../iommu-pages.h"
|
||||
|
||||
#include "../iommu-priv.h"
|
||||
#include "io_pagetable.h"
|
||||
|
|
@ -41,21 +43,6 @@ static DEFINE_IDA(mock_dev_ida);
|
|||
|
||||
enum {
|
||||
MOCK_DIRTY_TRACK = 1,
|
||||
MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
|
||||
MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE,
|
||||
|
||||
/*
|
||||
* Like a real page table alignment requires the low bits of the address
|
||||
* to be zero. xarray also requires the high bit to be zero, so we store
|
||||
* the pfns shifted. The upper bits are used for metadata.
|
||||
*/
|
||||
MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE,
|
||||
|
||||
_MOCK_PFN_START = MOCK_PFN_MASK + 1,
|
||||
MOCK_PFN_START_IOVA = _MOCK_PFN_START,
|
||||
MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
|
||||
MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1,
|
||||
MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2,
|
||||
};
|
||||
|
||||
static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain);
|
||||
|
|
@ -124,10 +111,15 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
|
|||
}
|
||||
|
||||
struct mock_iommu_domain {
|
||||
union {
|
||||
struct iommu_domain domain;
|
||||
struct pt_iommu iommu;
|
||||
struct pt_iommu_amdv1 amdv1;
|
||||
};
|
||||
unsigned long flags;
|
||||
struct iommu_domain domain;
|
||||
struct xarray pfns;
|
||||
};
|
||||
PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, amdv1.iommu, domain);
|
||||
|
||||
static inline struct mock_iommu_domain *
|
||||
to_mock_domain(struct iommu_domain *domain)
|
||||
|
|
@ -216,7 +208,7 @@ static inline struct selftest_obj *to_selftest_obj(struct iommufd_object *obj)
|
|||
}
|
||||
|
||||
static int mock_domain_nop_attach(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct mock_dev *mdev = to_mock_dev(dev);
|
||||
struct mock_viommu *new_viommu = NULL;
|
||||
|
|
@ -344,74 +336,6 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock,
|
||||
unsigned long iova, size_t page_size,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long cur, end = iova + page_size - 1;
|
||||
bool dirty = false;
|
||||
void *ent, *old;
|
||||
|
||||
for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) {
|
||||
ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
|
||||
if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA))
|
||||
continue;
|
||||
|
||||
dirty = true;
|
||||
/* Clear dirty */
|
||||
if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
|
||||
unsigned long val;
|
||||
|
||||
val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
|
||||
old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE,
|
||||
xa_mk_value(val), GFP_KERNEL);
|
||||
WARN_ON_ONCE(ent != old);
|
||||
}
|
||||
}
|
||||
|
||||
return dirty;
|
||||
}
|
||||
|
||||
static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size,
|
||||
unsigned long flags,
|
||||
struct iommu_dirty_bitmap *dirty)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
unsigned long end = iova + size;
|
||||
void *ent;
|
||||
|
||||
if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
unsigned long pgsize = MOCK_IO_PAGE_SIZE;
|
||||
unsigned long head;
|
||||
|
||||
ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
if (!ent) {
|
||||
iova += pgsize;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA)
|
||||
pgsize = MOCK_HUGE_PAGE_SIZE;
|
||||
head = iova & ~(pgsize - 1);
|
||||
|
||||
/* Clear dirty */
|
||||
if (mock_test_and_clear_dirty(mock, head, pgsize, flags))
|
||||
iommu_dirty_bitmap_record(dirty, iova, pgsize);
|
||||
iova += pgsize;
|
||||
} while (iova < end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct iommu_dirty_ops dirty_ops = {
|
||||
.set_dirty_tracking = mock_domain_set_dirty_tracking,
|
||||
.read_and_clear_dirty = mock_domain_read_and_clear_dirty,
|
||||
};
|
||||
|
||||
static struct mock_iommu_domain_nested *
|
||||
__mock_domain_alloc_nested(const struct iommu_user_data *user_data)
|
||||
{
|
||||
|
|
@ -446,7 +370,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
|
|||
|
||||
if (flags & ~IOMMU_HWPT_ALLOC_PASID)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
if (!parent || parent->ops != mock_ops.default_domain_ops)
|
||||
if (!parent || !(parent->type & __IOMMU_DOMAIN_PAGING))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
mock_parent = to_mock_domain(parent);
|
||||
|
|
@ -459,6 +383,138 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
|
|||
return &mock_nested->domain;
|
||||
}
|
||||
|
||||
static void mock_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
|
||||
pt_iommu_deinit(&mock->iommu);
|
||||
kfree(mock);
|
||||
}
|
||||
|
||||
static void mock_iotlb_sync(struct iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather)
|
||||
{
|
||||
iommu_put_pages_list(&gather->freelist);
|
||||
}
|
||||
|
||||
static const struct iommu_domain_ops amdv1_mock_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS(amdv1_mock),
|
||||
.free = mock_domain_free,
|
||||
.attach_dev = mock_domain_nop_attach,
|
||||
.set_dev_pasid = mock_domain_set_dev_pasid_nop,
|
||||
.iotlb_sync = &mock_iotlb_sync,
|
||||
};
|
||||
|
||||
static const struct iommu_domain_ops amdv1_mock_huge_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS(amdv1_mock),
|
||||
.free = mock_domain_free,
|
||||
.attach_dev = mock_domain_nop_attach,
|
||||
.set_dev_pasid = mock_domain_set_dev_pasid_nop,
|
||||
.iotlb_sync = &mock_iotlb_sync,
|
||||
};
|
||||
#undef pt_iommu_amdv1_mock_map_pages
|
||||
|
||||
static const struct iommu_dirty_ops amdv1_mock_dirty_ops = {
|
||||
IOMMU_PT_DIRTY_OPS(amdv1_mock),
|
||||
.set_dirty_tracking = mock_domain_set_dirty_tracking,
|
||||
};
|
||||
|
||||
static const struct iommu_domain_ops amdv1_ops = {
|
||||
IOMMU_PT_DOMAIN_OPS(amdv1),
|
||||
.free = mock_domain_free,
|
||||
.attach_dev = mock_domain_nop_attach,
|
||||
.set_dev_pasid = mock_domain_set_dev_pasid_nop,
|
||||
.iotlb_sync = &mock_iotlb_sync,
|
||||
};
|
||||
|
||||
static const struct iommu_dirty_ops amdv1_dirty_ops = {
|
||||
IOMMU_PT_DIRTY_OPS(amdv1),
|
||||
.set_dirty_tracking = mock_domain_set_dirty_tracking,
|
||||
};
|
||||
|
||||
static struct mock_iommu_domain *
|
||||
mock_domain_alloc_pgtable(struct device *dev,
|
||||
const struct iommu_hwpt_selftest *user_cfg, u32 flags)
|
||||
{
|
||||
struct mock_iommu_domain *mock;
|
||||
int rc;
|
||||
|
||||
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
|
||||
if (!mock)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
|
||||
|
||||
mock->amdv1.iommu.nid = NUMA_NO_NODE;
|
||||
|
||||
switch (user_cfg->pagetable_type) {
|
||||
case MOCK_IOMMUPT_DEFAULT:
|
||||
case MOCK_IOMMUPT_HUGE: {
|
||||
struct pt_iommu_amdv1_cfg cfg = {};
|
||||
|
||||
/* The mock version has a 2k page size */
|
||||
cfg.common.hw_max_vasz_lg2 = 56;
|
||||
cfg.common.hw_max_oasz_lg2 = 51;
|
||||
cfg.starting_level = 2;
|
||||
if (user_cfg->pagetable_type == MOCK_IOMMUPT_HUGE)
|
||||
mock->domain.ops = &amdv1_mock_huge_ops;
|
||||
else
|
||||
mock->domain.ops = &amdv1_mock_ops;
|
||||
rc = pt_iommu_amdv1_mock_init(&mock->amdv1, &cfg, GFP_KERNEL);
|
||||
if (rc)
|
||||
goto err_free;
|
||||
|
||||
/*
|
||||
* In huge mode userspace should only provide huge pages, we
|
||||
* have to include PAGE_SIZE for the domain to be accepted by
|
||||
* iommufd.
|
||||
*/
|
||||
if (user_cfg->pagetable_type == MOCK_IOMMUPT_HUGE)
|
||||
mock->domain.pgsize_bitmap = MOCK_HUGE_PAGE_SIZE |
|
||||
PAGE_SIZE;
|
||||
if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
|
||||
mock->domain.dirty_ops = &amdv1_mock_dirty_ops;
|
||||
break;
|
||||
}
|
||||
|
||||
case MOCK_IOMMUPT_AMDV1: {
|
||||
struct pt_iommu_amdv1_cfg cfg = {};
|
||||
|
||||
cfg.common.hw_max_vasz_lg2 = 64;
|
||||
cfg.common.hw_max_oasz_lg2 = 52;
|
||||
cfg.common.features = BIT(PT_FEAT_DYNAMIC_TOP) |
|
||||
BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
|
||||
BIT(PT_FEAT_AMDV1_FORCE_COHERENCE);
|
||||
cfg.starting_level = 2;
|
||||
mock->domain.ops = &amdv1_ops;
|
||||
rc = pt_iommu_amdv1_init(&mock->amdv1, &cfg, GFP_KERNEL);
|
||||
if (rc)
|
||||
goto err_free;
|
||||
if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
|
||||
mock->domain.dirty_ops = &amdv1_dirty_ops;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rc = -EOPNOTSUPP;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
/*
|
||||
* Override the real aperture to the MOCK aperture for test purposes.
|
||||
*/
|
||||
if (user_cfg->pagetable_type == MOCK_IOMMUPT_DEFAULT) {
|
||||
WARN_ON(mock->domain.geometry.aperture_start != 0);
|
||||
WARN_ON(mock->domain.geometry.aperture_end < MOCK_APERTURE_LAST);
|
||||
|
||||
mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
|
||||
mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
|
||||
}
|
||||
|
||||
return mock;
|
||||
err_free:
|
||||
kfree(mock);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
static struct iommu_domain *
|
||||
mock_domain_alloc_paging_flags(struct device *dev, u32 flags,
|
||||
const struct iommu_user_data *user_data)
|
||||
|
|
@ -469,151 +525,30 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags,
|
|||
IOMMU_HWPT_ALLOC_PASID;
|
||||
struct mock_dev *mdev = to_mock_dev(dev);
|
||||
bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY;
|
||||
struct iommu_hwpt_selftest user_cfg = {};
|
||||
struct mock_iommu_domain *mock;
|
||||
int rc;
|
||||
|
||||
if (user_data)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops))
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
|
||||
if (!mock)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
|
||||
mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
|
||||
mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
|
||||
if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA)
|
||||
mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE;
|
||||
mock->domain.ops = mock_ops.default_domain_ops;
|
||||
mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
|
||||
xa_init(&mock->pfns);
|
||||
if (user_data && (user_data->type != IOMMU_HWPT_DATA_SELFTEST &&
|
||||
user_data->type != IOMMU_HWPT_DATA_NONE))
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
if (has_dirty_flag)
|
||||
mock->domain.dirty_ops = &dirty_ops;
|
||||
if (user_data) {
|
||||
rc = iommu_copy_struct_from_user(
|
||||
&user_cfg, user_data, IOMMU_HWPT_DATA_SELFTEST, iotlb);
|
||||
if (rc)
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
mock = mock_domain_alloc_pgtable(dev, &user_cfg, flags);
|
||||
if (IS_ERR(mock))
|
||||
return ERR_CAST(mock);
|
||||
return &mock->domain;
|
||||
}
|
||||
|
||||
static void mock_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
|
||||
WARN_ON(!xa_empty(&mock->pfns));
|
||||
kfree(mock);
|
||||
}
|
||||
|
||||
static int mock_domain_map_pages(struct iommu_domain *domain,
|
||||
unsigned long iova, phys_addr_t paddr,
|
||||
size_t pgsize, size_t pgcount, int prot,
|
||||
gfp_t gfp, size_t *mapped)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
unsigned long flags = MOCK_PFN_START_IOVA;
|
||||
unsigned long start_iova = iova;
|
||||
|
||||
/*
|
||||
* xarray does not reliably work with fault injection because it does a
|
||||
* retry allocation, so put our own failure point.
|
||||
*/
|
||||
if (iommufd_should_fail())
|
||||
return -ENOENT;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
|
||||
for (; pgcount; pgcount--) {
|
||||
size_t cur;
|
||||
|
||||
for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
|
||||
void *old;
|
||||
|
||||
if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
|
||||
flags = MOCK_PFN_LAST_IOVA;
|
||||
if (pgsize != MOCK_IO_PAGE_SIZE) {
|
||||
flags |= MOCK_PFN_HUGE_IOVA;
|
||||
}
|
||||
old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE,
|
||||
xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) |
|
||||
flags),
|
||||
gfp);
|
||||
if (xa_is_err(old)) {
|
||||
for (; start_iova != iova;
|
||||
start_iova += MOCK_IO_PAGE_SIZE)
|
||||
xa_erase(&mock->pfns,
|
||||
start_iova /
|
||||
MOCK_IO_PAGE_SIZE);
|
||||
return xa_err(old);
|
||||
}
|
||||
WARN_ON(old);
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
paddr += MOCK_IO_PAGE_SIZE;
|
||||
*mapped += MOCK_IO_PAGE_SIZE;
|
||||
flags = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t pgsize,
|
||||
size_t pgcount,
|
||||
struct iommu_iotlb_gather *iotlb_gather)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
bool first = true;
|
||||
size_t ret = 0;
|
||||
void *ent;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
|
||||
|
||||
for (; pgcount; pgcount--) {
|
||||
size_t cur;
|
||||
|
||||
for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
|
||||
ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* iommufd generates unmaps that must be a strict
|
||||
* superset of the map's performend So every
|
||||
* starting/ending IOVA should have been an iova passed
|
||||
* to map.
|
||||
*
|
||||
* This simple logic doesn't work when the HUGE_PAGE is
|
||||
* turned on since the core code will automatically
|
||||
* switch between the two page sizes creating a break in
|
||||
* the unmap calls. The break can land in the middle of
|
||||
* contiguous IOVA.
|
||||
*/
|
||||
if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) {
|
||||
if (first) {
|
||||
WARN_ON(ent && !(xa_to_value(ent) &
|
||||
MOCK_PFN_START_IOVA));
|
||||
first = false;
|
||||
}
|
||||
if (pgcount == 1 &&
|
||||
cur + MOCK_IO_PAGE_SIZE == pgsize)
|
||||
WARN_ON(ent && !(xa_to_value(ent) &
|
||||
MOCK_PFN_LAST_IOVA));
|
||||
}
|
||||
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
ret += MOCK_IO_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain,
|
||||
dma_addr_t iova)
|
||||
{
|
||||
struct mock_iommu_domain *mock = to_mock_domain(domain);
|
||||
void *ent;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(!ent);
|
||||
return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE;
|
||||
}
|
||||
|
||||
static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
|
||||
{
|
||||
struct mock_dev *mdev = to_mock_dev(dev);
|
||||
|
|
@ -955,15 +890,6 @@ static const struct iommu_ops mock_ops = {
|
|||
.user_pasid_table = true,
|
||||
.get_viommu_size = mock_get_viommu_size,
|
||||
.viommu_init = mock_viommu_init,
|
||||
.default_domain_ops =
|
||||
&(struct iommu_domain_ops){
|
||||
.free = mock_domain_free,
|
||||
.attach_dev = mock_domain_nop_attach,
|
||||
.map_pages = mock_domain_map_pages,
|
||||
.unmap_pages = mock_domain_unmap_pages,
|
||||
.iova_to_phys = mock_domain_iova_to_phys,
|
||||
.set_dev_pasid = mock_domain_set_dev_pasid_nop,
|
||||
},
|
||||
};
|
||||
|
||||
static void mock_domain_free_nested(struct iommu_domain *domain)
|
||||
|
|
@ -1047,7 +973,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
|
|||
if (IS_ERR(hwpt))
|
||||
return hwpt;
|
||||
if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED ||
|
||||
hwpt->domain->ops != mock_ops.default_domain_ops) {
|
||||
hwpt->domain->owner != &mock_ops) {
|
||||
iommufd_put_object(ucmd->ictx, &hwpt->obj);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
|
@ -1088,7 +1014,6 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
|
|||
{},
|
||||
};
|
||||
const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY |
|
||||
MOCK_FLAGS_DEVICE_HUGE_IOVA |
|
||||
MOCK_FLAGS_DEVICE_PASID;
|
||||
struct mock_dev *mdev;
|
||||
int rc, i;
|
||||
|
|
@ -1277,23 +1202,25 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
|
|||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct mock_iommu_domain *mock;
|
||||
unsigned int page_size;
|
||||
uintptr_t end;
|
||||
int rc;
|
||||
|
||||
if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE ||
|
||||
(uintptr_t)uptr % MOCK_IO_PAGE_SIZE ||
|
||||
check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end))
|
||||
return -EINVAL;
|
||||
|
||||
hwpt = get_md_pagetable(ucmd, mockpt_id, &mock);
|
||||
if (IS_ERR(hwpt))
|
||||
return PTR_ERR(hwpt);
|
||||
|
||||
for (; length; length -= MOCK_IO_PAGE_SIZE) {
|
||||
page_size = 1 << __ffs(mock->domain.pgsize_bitmap);
|
||||
if (iova % page_size || length % page_size ||
|
||||
(uintptr_t)uptr % page_size ||
|
||||
check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end))
|
||||
return -EINVAL;
|
||||
|
||||
for (; length; length -= page_size) {
|
||||
struct page *pages[1];
|
||||
phys_addr_t io_phys;
|
||||
unsigned long pfn;
|
||||
long npages;
|
||||
void *ent;
|
||||
|
||||
npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0,
|
||||
pages);
|
||||
|
|
@ -1308,15 +1235,14 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
|
|||
pfn = page_to_pfn(pages[0]);
|
||||
put_page(pages[0]);
|
||||
|
||||
ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
if (!ent ||
|
||||
(xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE !=
|
||||
pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) {
|
||||
io_phys = mock->domain.ops->iova_to_phys(&mock->domain, iova);
|
||||
if (io_phys !=
|
||||
pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) {
|
||||
rc = -EINVAL;
|
||||
goto out_put;
|
||||
}
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
uptr += MOCK_IO_PAGE_SIZE;
|
||||
iova += page_size;
|
||||
uptr += page_size;
|
||||
}
|
||||
rc = 0;
|
||||
|
||||
|
|
@ -1795,7 +1721,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
|
|||
if (IS_ERR(hwpt))
|
||||
return PTR_ERR(hwpt);
|
||||
|
||||
if (!(mock->flags & MOCK_DIRTY_TRACK)) {
|
||||
if (!(mock->flags & MOCK_DIRTY_TRACK) || !mock->iommu.ops->set_dirty) {
|
||||
rc = -EINVAL;
|
||||
goto out_put;
|
||||
}
|
||||
|
|
@ -1814,22 +1740,10 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
|
|||
}
|
||||
|
||||
for (i = 0; i < max; i++) {
|
||||
unsigned long cur = iova + i * page_size;
|
||||
void *ent, *old;
|
||||
|
||||
if (!test_bit(i, (unsigned long *)tmp))
|
||||
continue;
|
||||
|
||||
ent = xa_load(&mock->pfns, cur / page_size);
|
||||
if (ent) {
|
||||
unsigned long val;
|
||||
|
||||
val = xa_to_value(ent) | MOCK_PFN_DIRTY_IOVA;
|
||||
old = xa_store(&mock->pfns, cur / page_size,
|
||||
xa_mk_value(val), GFP_KERNEL);
|
||||
WARN_ON_ONCE(ent != old);
|
||||
count++;
|
||||
}
|
||||
mock->iommu.ops->set_dirty(&mock->iommu, iova + i * page_size);
|
||||
count++;
|
||||
}
|
||||
|
||||
cmd->dirty.out_nr_dirty = count;
|
||||
|
|
@ -2202,3 +2116,5 @@ void iommufd_test_exit(void)
|
|||
platform_device_unregister(selftest_iommu_dev);
|
||||
debugfs_remove_recursive(dbgfs_root);
|
||||
}
|
||||
|
||||
MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
|
||||
|
|
|
|||
|
|
@ -590,7 +590,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
|
|||
}
|
||||
|
||||
static int ipmmu_attach_device(struct iommu_domain *io_domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
|
||||
|
|
@ -637,17 +637,17 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
|
|||
}
|
||||
|
||||
static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev);
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct ipmmu_vmsa_domain *domain;
|
||||
unsigned int i;
|
||||
|
||||
if (io_domain == identity_domain || !io_domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
domain = to_vmsa_domain(io_domain);
|
||||
domain = to_vmsa_domain(old);
|
||||
for (i = 0; i < fwspec->num_ids; ++i)
|
||||
ipmmu_utlb_disable(domain, fwspec->ids[i]);
|
||||
|
||||
|
|
@ -720,6 +720,8 @@ static int ipmmu_init_platform_device(struct device *dev,
|
|||
|
||||
dev_iommu_priv_set(dev, platform_get_drvdata(ipmmu_pdev));
|
||||
|
||||
put_device(&ipmmu_pdev->dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -391,7 +391,8 @@ static struct iommu_device *msm_iommu_probe_device(struct device *dev)
|
|||
return &iommu->iommu;
|
||||
}
|
||||
|
||||
static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
|
|
@ -441,19 +442,19 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
|||
}
|
||||
|
||||
static int msm_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct msm_priv *priv;
|
||||
unsigned long flags;
|
||||
struct msm_iommu_dev *iommu;
|
||||
struct msm_iommu_ctx_dev *master;
|
||||
int ret = 0;
|
||||
|
||||
if (domain == identity_domain || !domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
priv = to_msm_priv(domain);
|
||||
priv = to_msm_priv(old);
|
||||
free_io_pgtable_ops(priv->iop);
|
||||
|
||||
spin_lock_irqsave(&msm_iommu_lock, flags);
|
||||
|
|
|
|||
|
|
@ -139,6 +139,7 @@
|
|||
/* 2 bits: iommu type */
|
||||
#define MTK_IOMMU_TYPE_MM (0x0 << 13)
|
||||
#define MTK_IOMMU_TYPE_INFRA (0x1 << 13)
|
||||
#define MTK_IOMMU_TYPE_APU (0x2 << 13)
|
||||
#define MTK_IOMMU_TYPE_MASK (0x3 << 13)
|
||||
/* PM and clock always on. e.g. infra iommu */
|
||||
#define PM_CLK_AO BIT(15)
|
||||
|
|
@ -147,6 +148,7 @@
|
|||
#define TF_PORT_TO_ADDR_MT8173 BIT(18)
|
||||
#define INT_ID_PORT_WIDTH_6 BIT(19)
|
||||
#define CFG_IFA_MASTER_IN_ATF BIT(20)
|
||||
#define DL_WITH_MULTI_LARB BIT(21)
|
||||
|
||||
#define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \
|
||||
((((pdata)->flags) & (mask)) == (_x))
|
||||
|
|
@ -172,6 +174,7 @@ enum mtk_iommu_plat {
|
|||
M4U_MT8183,
|
||||
M4U_MT8186,
|
||||
M4U_MT8188,
|
||||
M4U_MT8189,
|
||||
M4U_MT8192,
|
||||
M4U_MT8195,
|
||||
M4U_MT8365,
|
||||
|
|
@ -335,6 +338,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data, unsigned int ban
|
|||
*/
|
||||
#define MTK_IOMMU_4GB_MODE_REMAP_BASE 0x140000000UL
|
||||
|
||||
static LIST_HEAD(apulist); /* List the apu iommu HWs */
|
||||
static LIST_HEAD(infralist); /* List the iommu_infra HW */
|
||||
static LIST_HEAD(m4ulist); /* List all the M4U HWs */
|
||||
|
||||
#define for_each_m4u(data, head) list_for_each_entry(data, head, list)
|
||||
|
|
@ -350,6 +355,15 @@ static const struct mtk_iommu_iova_region single_domain[] = {
|
|||
#define MT8192_MULTI_REGION_NR (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) ? \
|
||||
MT8192_MULTI_REGION_NR_MAX : 1)
|
||||
|
||||
static const struct mtk_iommu_iova_region mt8189_multi_dom_apu[] = {
|
||||
{ .iova_base = 0x200000ULL, .size = SZ_512M}, /* APU SECURE */
|
||||
#if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)
|
||||
{ .iova_base = SZ_1G, .size = 0xc0000000}, /* APU CODE */
|
||||
{ .iova_base = 0x70000000ULL, .size = 0x12600000}, /* APU VLM */
|
||||
{ .iova_base = SZ_4G, .size = SZ_4G * 3}, /* APU VPU */
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct mtk_iommu_iova_region mt8192_multi_dom[MT8192_MULTI_REGION_NR] = {
|
||||
{ .iova_base = 0x0, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 0 ~ 4G, */
|
||||
#if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)
|
||||
|
|
@ -705,7 +719,7 @@ static void mtk_iommu_domain_free(struct iommu_domain *domain)
|
|||
}
|
||||
|
||||
static int mtk_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct mtk_iommu_data *data = dev_iommu_priv_get(dev), *frstdata;
|
||||
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
|
||||
|
|
@ -773,12 +787,12 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
|
||||
|
||||
if (domain == identity_domain || !domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
mtk_iommu_config(data, dev, false, 0);
|
||||
|
|
@ -865,6 +879,7 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
|
|||
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
|
||||
struct device_link *link;
|
||||
struct device *larbdev;
|
||||
unsigned long larbid_msk = 0;
|
||||
unsigned int larbid, larbidx, i;
|
||||
|
||||
if (!MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
|
||||
|
|
@ -872,30 +887,50 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
|
|||
|
||||
/*
|
||||
* Link the consumer device with the smi-larb device(supplier).
|
||||
* The device that connects with each a larb is a independent HW.
|
||||
* All the ports in each a device should be in the same larbs.
|
||||
* w/DL_WITH_MULTI_LARB: the master may connect with multi larbs,
|
||||
* we should create device link with each larb.
|
||||
* w/o DL_WITH_MULTI_LARB: the master must connect with one larb,
|
||||
* otherwise fail.
|
||||
*/
|
||||
larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
|
||||
if (larbid >= MTK_LARB_NR_MAX)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
larbid_msk |= BIT(larbid);
|
||||
|
||||
for (i = 1; i < fwspec->num_ids; i++) {
|
||||
larbidx = MTK_M4U_TO_LARB(fwspec->ids[i]);
|
||||
if (larbid != larbidx) {
|
||||
if (MTK_IOMMU_HAS_FLAG(data->plat_data, DL_WITH_MULTI_LARB)) {
|
||||
larbid_msk |= BIT(larbidx);
|
||||
} else if (larbid != larbidx) {
|
||||
dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n",
|
||||
larbid, larbidx);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
}
|
||||
larbdev = data->larb_imu[larbid].dev;
|
||||
if (!larbdev)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
link = device_link_add(dev, larbdev,
|
||||
DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
|
||||
if (!link)
|
||||
dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
|
||||
for_each_set_bit(larbid, &larbid_msk, 32) {
|
||||
larbdev = data->larb_imu[larbid].dev;
|
||||
if (!larbdev)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
link = device_link_add(dev, larbdev,
|
||||
DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
|
||||
if (!link) {
|
||||
dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
|
||||
goto link_remove;
|
||||
}
|
||||
}
|
||||
|
||||
return &data->iommu;
|
||||
|
||||
link_remove:
|
||||
for_each_set_bit(i, &larbid_msk, larbid) {
|
||||
larbdev = data->larb_imu[i].dev;
|
||||
device_link_remove(dev, larbdev);
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static void mtk_iommu_release_device(struct device *dev)
|
||||
|
|
@ -903,11 +938,19 @@ static void mtk_iommu_release_device(struct device *dev)
|
|||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct mtk_iommu_data *data;
|
||||
struct device *larbdev;
|
||||
unsigned int larbid;
|
||||
unsigned int larbid, i;
|
||||
unsigned long larbid_msk = 0;
|
||||
|
||||
data = dev_iommu_priv_get(dev);
|
||||
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
|
||||
larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
|
||||
if (!MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
|
||||
return;
|
||||
|
||||
for (i = 0; i < fwspec->num_ids; i++) {
|
||||
larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
|
||||
larbid_msk |= BIT(larbid);
|
||||
}
|
||||
|
||||
for_each_set_bit(larbid, &larbid_msk, 32) {
|
||||
larbdev = data->larb_imu[larbid].dev;
|
||||
device_link_remove(dev, larbdev);
|
||||
}
|
||||
|
|
@ -974,6 +1017,8 @@ static int mtk_iommu_of_xlate(struct device *dev,
|
|||
return -EINVAL;
|
||||
|
||||
dev_iommu_priv_set(dev, platform_get_drvdata(m4updev));
|
||||
|
||||
put_device(&m4updev->dev);
|
||||
}
|
||||
|
||||
return iommu_fwspec_add_ids(dev, args->args, 1);
|
||||
|
|
@ -1211,16 +1256,19 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
|
|||
}
|
||||
|
||||
component_match_add(dev, match, component_compare_dev, &plarbdev->dev);
|
||||
platform_device_put(plarbdev);
|
||||
}
|
||||
|
||||
if (!frst_avail_smicomm_node)
|
||||
return -EINVAL;
|
||||
if (!frst_avail_smicomm_node) {
|
||||
ret = -EINVAL;
|
||||
goto err_larbdev_put;
|
||||
}
|
||||
|
||||
pcommdev = of_find_device_by_node(frst_avail_smicomm_node);
|
||||
of_node_put(frst_avail_smicomm_node);
|
||||
if (!pcommdev)
|
||||
return -ENODEV;
|
||||
if (!pcommdev) {
|
||||
ret = -ENODEV;
|
||||
goto err_larbdev_put;
|
||||
}
|
||||
data->smicomm_dev = &pcommdev->dev;
|
||||
|
||||
link = device_link_add(data->smicomm_dev, dev,
|
||||
|
|
@ -1228,16 +1276,16 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
|
|||
platform_device_put(pcommdev);
|
||||
if (!link) {
|
||||
dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev));
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto err_larbdev_put;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_larbdev_put:
|
||||
for (i = MTK_LARB_NR_MAX - 1; i >= 0; i--) {
|
||||
if (!data->larb_imu[i].dev)
|
||||
continue;
|
||||
/* id mapping may not be linear, loop the whole array */
|
||||
for (i = 0; i < MTK_LARB_NR_MAX; i++)
|
||||
put_device(data->larb_imu[i].dev);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -1400,8 +1448,12 @@ static int mtk_iommu_probe(struct platform_device *pdev)
|
|||
iommu_device_sysfs_remove(&data->iommu);
|
||||
out_list_del:
|
||||
list_del(&data->list);
|
||||
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
|
||||
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
|
||||
device_link_remove(data->smicomm_dev, dev);
|
||||
|
||||
for (i = 0; i < MTK_LARB_NR_MAX; i++)
|
||||
put_device(data->larb_imu[i].dev);
|
||||
}
|
||||
out_runtime_disable:
|
||||
pm_runtime_disable(dev);
|
||||
return ret;
|
||||
|
|
@ -1421,6 +1473,9 @@ static void mtk_iommu_remove(struct platform_device *pdev)
|
|||
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
|
||||
device_link_remove(data->smicomm_dev, &pdev->dev);
|
||||
component_master_del(&pdev->dev, &mtk_iommu_com_ops);
|
||||
|
||||
for (i = 0; i < MTK_LARB_NR_MAX; i++)
|
||||
put_device(data->larb_imu[i].dev);
|
||||
}
|
||||
pm_runtime_disable(&pdev->dev);
|
||||
for (i = 0; i < data->plat_data->banks_num; i++) {
|
||||
|
|
@ -1695,6 +1750,66 @@ static const struct mtk_iommu_plat_data mt8188_data_vpp = {
|
|||
27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}},
|
||||
};
|
||||
|
||||
static const unsigned int mt8189_apu_region_msk[][MTK_LARB_NR_MAX] = {
|
||||
[0] = {[0] = BIT(2)}, /* Region0: fake larb 0 APU_SECURE */
|
||||
[1] = {[0] = BIT(1)}, /* Region1: fake larb 0 APU_CODE */
|
||||
[2] = {[0] = BIT(3)}, /* Region2: fake larb 0 APU_VLM */
|
||||
[3] = {[0] = BIT(0)}, /* Region3: fake larb 0 APU_DATA */
|
||||
};
|
||||
|
||||
static const struct mtk_iommu_plat_data mt8189_data_apu = {
|
||||
.m4u_plat = M4U_MT8189,
|
||||
.flags = IOVA_34_EN | DCM_DISABLE |
|
||||
MTK_IOMMU_TYPE_APU | PGTABLE_PA_35_EN,
|
||||
.hw_list = &apulist,
|
||||
.inv_sel_reg = REG_MMU_INV_SEL_GEN2,
|
||||
.banks_num = 1,
|
||||
.banks_enable = {true},
|
||||
.iova_region = mt8189_multi_dom_apu,
|
||||
.iova_region_nr = ARRAY_SIZE(mt8189_multi_dom_apu),
|
||||
.larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}},
|
||||
.iova_region_larb_msk = mt8189_apu_region_msk,
|
||||
};
|
||||
|
||||
static const struct mtk_iommu_plat_data mt8189_data_infra = {
|
||||
.m4u_plat = M4U_MT8189,
|
||||
.flags = WR_THROT_EN | DCM_DISABLE | MTK_IOMMU_TYPE_INFRA |
|
||||
CFG_IFA_MASTER_IN_ATF | SHARE_PGTABLE | PGTABLE_PA_35_EN,
|
||||
.hw_list = &infralist,
|
||||
.banks_num = 1,
|
||||
.banks_enable = {true},
|
||||
.inv_sel_reg = REG_MMU_INV_SEL_GEN2,
|
||||
.iova_region = single_domain,
|
||||
.iova_region_nr = ARRAY_SIZE(single_domain),
|
||||
};
|
||||
|
||||
static const u32 mt8189_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = {
|
||||
[0] = {~0, ~0, ~0, [22] = BIT(0)}, /* Region0: all ports for larb0/1/2 */
|
||||
[1] = {[3] = ~0, [4] = ~0}, /* Region1: all ports for larb4(3)/7(4) */
|
||||
[2] = {[5] = ~0, [6] = ~0, /* Region2: all ports for larb9(5)/11(6) */
|
||||
[7] = ~0, [8] = ~0, /* Region2: all ports for larb13(7)/14(8) */
|
||||
[9] = ~0, [10] = ~0, /* Region2: all ports for larb16(9)/17(10) */
|
||||
[11] = ~0, [12] = ~0, /* Region2: all ports for larb19(11)/20(12) */
|
||||
[21] = ~0}, /* Region2: larb21 fake GCE larb */
|
||||
};
|
||||
|
||||
static const struct mtk_iommu_plat_data mt8189_data_mm = {
|
||||
.m4u_plat = M4U_MT8189,
|
||||
.flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN |
|
||||
WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM |
|
||||
PGTABLE_PA_35_EN | DL_WITH_MULTI_LARB,
|
||||
.hw_list = &m4ulist,
|
||||
.inv_sel_reg = REG_MMU_INV_SEL_GEN2,
|
||||
.banks_num = 5,
|
||||
.banks_enable = {true, false, false, false, false},
|
||||
.iova_region = mt8192_multi_dom,
|
||||
.iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
|
||||
.iova_region_larb_msk = mt8189_larb_region_msk,
|
||||
.larbid_remap = {{0}, {1}, {21/* GCE_D */, 21/* GCE_M */, 2},
|
||||
{19, 20, 9, 11}, {7}, {4},
|
||||
{13, 17}, {14, 16}},
|
||||
};
|
||||
|
||||
static const struct mtk_iommu_plat_data mt8192_data = {
|
||||
.m4u_plat = M4U_MT8192,
|
||||
.flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN |
|
||||
|
|
@ -1796,6 +1911,9 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
|
|||
{ .compatible = "mediatek,mt8188-iommu-infra", .data = &mt8188_data_infra},
|
||||
{ .compatible = "mediatek,mt8188-iommu-vdo", .data = &mt8188_data_vdo},
|
||||
{ .compatible = "mediatek,mt8188-iommu-vpp", .data = &mt8188_data_vpp},
|
||||
{ .compatible = "mediatek,mt8189-iommu-apu", .data = &mt8189_data_apu},
|
||||
{ .compatible = "mediatek,mt8189-iommu-infra", .data = &mt8189_data_infra},
|
||||
{ .compatible = "mediatek,mt8189-iommu-mm", .data = &mt8189_data_mm},
|
||||
{ .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data},
|
||||
{ .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra},
|
||||
{ .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo},
|
||||
|
|
|
|||
|
|
@ -303,7 +303,9 @@ static void mtk_iommu_v1_domain_free(struct iommu_domain *domain)
|
|||
kfree(to_mtk_domain(domain));
|
||||
}
|
||||
|
||||
static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device *dev)
|
||||
static int mtk_iommu_v1_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
|
||||
struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain);
|
||||
|
|
@ -329,7 +331,8 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device
|
|||
}
|
||||
|
||||
static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
|
||||
|
||||
|
|
@ -435,6 +438,8 @@ static int mtk_iommu_v1_create_mapping(struct device *dev,
|
|||
return -EINVAL;
|
||||
|
||||
dev_iommu_priv_set(dev, platform_get_drvdata(m4updev));
|
||||
|
||||
put_device(&m4updev->dev);
|
||||
}
|
||||
|
||||
ret = iommu_fwspec_add_ids(dev, args->args, 1);
|
||||
|
|
@ -641,13 +646,18 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
|
|||
if (larb_nr < 0)
|
||||
return larb_nr;
|
||||
|
||||
if (larb_nr > MTK_LARB_NR_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < larb_nr; i++) {
|
||||
struct device_node *larbnode;
|
||||
struct platform_device *plarbdev;
|
||||
|
||||
larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
|
||||
if (!larbnode)
|
||||
return -EINVAL;
|
||||
if (!larbnode) {
|
||||
ret = -EINVAL;
|
||||
goto out_put_larbs;
|
||||
}
|
||||
|
||||
if (!of_device_is_available(larbnode)) {
|
||||
of_node_put(larbnode);
|
||||
|
|
@ -657,11 +667,14 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
|
|||
plarbdev = of_find_device_by_node(larbnode);
|
||||
if (!plarbdev) {
|
||||
of_node_put(larbnode);
|
||||
return -ENODEV;
|
||||
ret = -ENODEV;
|
||||
goto out_put_larbs;
|
||||
}
|
||||
if (!plarbdev->dev.driver) {
|
||||
of_node_put(larbnode);
|
||||
return -EPROBE_DEFER;
|
||||
put_device(&plarbdev->dev);
|
||||
ret = -EPROBE_DEFER;
|
||||
goto out_put_larbs;
|
||||
}
|
||||
data->larb_imu[i].dev = &plarbdev->dev;
|
||||
|
||||
|
|
@ -673,7 +686,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
|
|||
|
||||
ret = mtk_iommu_v1_hw_init(data);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_put_larbs;
|
||||
|
||||
ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL,
|
||||
dev_name(&pdev->dev));
|
||||
|
|
@ -695,12 +708,17 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
|
|||
iommu_device_sysfs_remove(&data->iommu);
|
||||
out_clk_unprepare:
|
||||
clk_disable_unprepare(data->bclk);
|
||||
out_put_larbs:
|
||||
for (i = 0; i < MTK_LARB_NR_MAX; i++)
|
||||
put_device(data->larb_imu[i].dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mtk_iommu_v1_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct mtk_iommu_v1_data *data = platform_get_drvdata(pdev);
|
||||
int i;
|
||||
|
||||
iommu_device_sysfs_remove(&data->iommu);
|
||||
iommu_device_unregister(&data->iommu);
|
||||
|
|
@ -708,6 +726,9 @@ static void mtk_iommu_v1_remove(struct platform_device *pdev)
|
|||
clk_disable_unprepare(data->bclk);
|
||||
devm_free_irq(&pdev->dev, data->irq, data);
|
||||
component_master_del(&pdev->dev, &mtk_iommu_v1_com_ops);
|
||||
|
||||
for (i = 0; i < MTK_LARB_NR_MAX; i++)
|
||||
put_device(data->larb_imu[i].dev);
|
||||
}
|
||||
|
||||
static int __maybe_unused mtk_iommu_v1_suspend(struct device *dev)
|
||||
|
|
|
|||
|
|
@ -1431,8 +1431,8 @@ static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain)
|
|||
odomain->iommus = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int omap_iommu_attach_dev(struct iommu_domain *domain,
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
|
||||
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
|
||||
|
|
@ -1536,15 +1536,15 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
|
|||
}
|
||||
|
||||
static int omap_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct omap_iommu_domain *omap_domain;
|
||||
|
||||
if (domain == identity_domain || !domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
omap_domain = to_omap_domain(domain);
|
||||
omap_domain = to_omap_domain(old);
|
||||
spin_lock(&omap_domain->lock);
|
||||
_omap_iommu_detach_dev(omap_domain, dev);
|
||||
spin_unlock(&omap_domain->lock);
|
||||
|
|
@ -1668,23 +1668,20 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev)
|
|||
}
|
||||
|
||||
pdev = of_find_device_by_node(np);
|
||||
of_node_put(np);
|
||||
if (!pdev) {
|
||||
of_node_put(np);
|
||||
kfree(arch_data);
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
oiommu = platform_get_drvdata(pdev);
|
||||
put_device(&pdev->dev);
|
||||
if (!oiommu) {
|
||||
of_node_put(np);
|
||||
kfree(arch_data);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
tmp->iommu_dev = oiommu;
|
||||
tmp->dev = &pdev->dev;
|
||||
|
||||
of_node_put(np);
|
||||
}
|
||||
|
||||
dev_iommu_priv_set(dev, arch_data);
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ struct omap_iommu {
|
|||
/**
|
||||
* struct omap_iommu_arch_data - omap iommu private data
|
||||
* @iommu_dev: handle of the OMAP iommu device
|
||||
* @dev: handle of the iommu device
|
||||
*
|
||||
* This is an omap iommu private data object, which binds an iommu user
|
||||
* to its iommu device. This object should be placed at the iommu user's
|
||||
|
|
@ -97,7 +96,6 @@ struct omap_iommu {
|
|||
*/
|
||||
struct omap_iommu_arch_data {
|
||||
struct omap_iommu *iommu_dev;
|
||||
struct device *dev;
|
||||
};
|
||||
|
||||
struct cr_regs {
|
||||
|
|
|
|||
|
|
@ -1321,7 +1321,8 @@ static bool riscv_iommu_pt_supported(struct riscv_iommu_device *iommu, int pgd_m
|
|||
}
|
||||
|
||||
static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
|
||||
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
|
||||
|
|
@ -1426,7 +1427,8 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
|
|||
}
|
||||
|
||||
static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
|
||||
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
|
||||
|
|
@ -1447,7 +1449,8 @@ static struct iommu_domain riscv_iommu_blocking_domain = {
|
|||
};
|
||||
|
||||
static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
|
||||
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
|
||||
|
|
|
|||
|
|
@ -960,7 +960,8 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
|
|||
}
|
||||
|
||||
static int rk_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct rk_iommu *iommu;
|
||||
struct rk_iommu_domain *rk_domain;
|
||||
|
|
@ -1005,7 +1006,7 @@ static struct iommu_domain rk_identity_domain = {
|
|||
};
|
||||
|
||||
static int rk_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct rk_iommu *iommu;
|
||||
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
|
||||
|
|
@ -1026,7 +1027,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
|
|||
if (iommu->domain == domain)
|
||||
return 0;
|
||||
|
||||
ret = rk_iommu_identity_attach(&rk_identity_domain, dev);
|
||||
ret = rk_iommu_identity_attach(&rk_identity_domain, dev, old);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
@ -1041,8 +1042,17 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
|
|||
return 0;
|
||||
|
||||
ret = rk_iommu_enable(iommu);
|
||||
if (ret)
|
||||
WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev));
|
||||
if (ret) {
|
||||
/*
|
||||
* Note rk_iommu_identity_attach() might fail before physically
|
||||
* attaching the dev to iommu->domain, in which case the actual
|
||||
* old domain for this revert should be rk_identity_domain v.s.
|
||||
* iommu->domain. Since rk_iommu_identity_attach() does not care
|
||||
* about the old domain argument for now, this is not a problem.
|
||||
*/
|
||||
WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev,
|
||||
iommu->domain));
|
||||
}
|
||||
|
||||
pm_runtime_put(iommu->dev);
|
||||
|
||||
|
|
|
|||
|
|
@ -670,7 +670,8 @@ int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
|
|||
}
|
||||
|
||||
static int blocking_domain_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct zpci_dev *zdev = to_zpci_dev(dev);
|
||||
struct s390_domain *s390_domain;
|
||||
|
|
@ -694,7 +695,8 @@ static int blocking_domain_attach_device(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
static int s390_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct s390_domain *s390_domain = to_s390_domain(domain);
|
||||
struct zpci_dev *zdev = to_zpci_dev(dev);
|
||||
|
|
@ -709,7 +711,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
|
|||
domain->geometry.aperture_end < zdev->start_dma))
|
||||
return -EINVAL;
|
||||
|
||||
blocking_domain_attach_device(&blocking_domain, dev);
|
||||
blocking_domain_attach_device(&blocking_domain, dev, old);
|
||||
|
||||
/* If we fail now DMA remains blocked via blocking domain */
|
||||
cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
|
||||
|
|
@ -1131,13 +1133,14 @@ static int __init s390_iommu_init(void)
|
|||
subsys_initcall(s390_iommu_init);
|
||||
|
||||
static int s390_attach_dev_identity(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct zpci_dev *zdev = to_zpci_dev(dev);
|
||||
u8 status;
|
||||
int cc;
|
||||
|
||||
blocking_domain_attach_device(&blocking_domain, dev);
|
||||
blocking_domain_attach_device(&blocking_domain, dev, old);
|
||||
|
||||
/* If we fail now DMA remains blocked via blocking domain */
|
||||
cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
|
||||
|
|
|
|||
|
|
@ -247,7 +247,8 @@ static void sprd_iommu_domain_free(struct iommu_domain *domain)
|
|||
}
|
||||
|
||||
static int sprd_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev);
|
||||
struct sprd_iommu_domain *dom = to_sprd_domain(domain);
|
||||
|
|
|
|||
|
|
@ -771,7 +771,8 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
|
|||
}
|
||||
|
||||
static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
|
||||
struct sun50i_iommu_domain *sun50i_domain;
|
||||
|
|
@ -797,7 +798,8 @@ static struct iommu_domain sun50i_iommu_identity_domain = {
|
|||
};
|
||||
|
||||
static int sun50i_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
|
||||
struct sun50i_iommu *iommu;
|
||||
|
|
@ -813,7 +815,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain,
|
|||
if (iommu->domain == domain)
|
||||
return 0;
|
||||
|
||||
sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev);
|
||||
sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev, old);
|
||||
|
||||
sun50i_iommu_attach_domain(iommu, sun50i_domain);
|
||||
|
||||
|
|
@ -839,6 +841,8 @@ static int sun50i_iommu_of_xlate(struct device *dev,
|
|||
|
||||
dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
|
||||
|
||||
put_device(&iommu_pdev->dev);
|
||||
|
||||
return iommu_fwspec_add_ids(dev, &id, 1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -490,7 +490,7 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
|
|||
}
|
||||
|
||||
static int tegra_smmu_attach_dev(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev, struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
|
||||
|
|
@ -524,9 +524,9 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
|
||||
struct tegra_smmu_as *as;
|
||||
struct tegra_smmu *smmu;
|
||||
|
|
@ -535,10 +535,10 @@ static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain,
|
|||
if (!fwspec)
|
||||
return -ENODEV;
|
||||
|
||||
if (domain == identity_domain || !domain)
|
||||
if (old == identity_domain || !old)
|
||||
return 0;
|
||||
|
||||
as = to_smmu_as(domain);
|
||||
as = to_smmu_as(old);
|
||||
smmu = as->smmu;
|
||||
for (index = 0; index < fwspec->num_ids; index++) {
|
||||
tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
|
||||
|
|
@ -830,10 +830,9 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
|
|||
return NULL;
|
||||
|
||||
mc = platform_get_drvdata(pdev);
|
||||
if (!mc) {
|
||||
put_device(&pdev->dev);
|
||||
put_device(&pdev->dev);
|
||||
if (!mc)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mc->smmu;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -730,7 +730,8 @@ static struct iommu_domain *viommu_domain_alloc_identity(struct device *dev)
|
|||
return domain;
|
||||
}
|
||||
|
||||
static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
int ret = 0;
|
||||
struct virtio_iommu_req_attach req;
|
||||
|
|
@ -781,7 +782,8 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
|||
}
|
||||
|
||||
static int viommu_attach_identity_domain(struct iommu_domain *domain,
|
||||
struct device *dev)
|
||||
struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
int ret = 0;
|
||||
struct virtio_iommu_req_attach req;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,283 @@
|
|||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/*
|
||||
* Copyright (c) 2025 MediaTek Inc.
|
||||
* Author: Zhengnan chen <zhengnan.chen@mediatek.com>
|
||||
*/
|
||||
#ifndef _DT_BINDINGS_MEMORY_MEDIATEK_MT8189_MEMORY_PORT_H_
|
||||
#define _DT_BINDINGS_MEMORY_MEDIATEK_MT8189_MEMORY_PORT_H_
|
||||
|
||||
#include <dt-bindings/memory/mtk-memory-port.h>
|
||||
|
||||
#define SMI_L0_ID (0)
|
||||
#define SMI_L1_ID (1)
|
||||
#define SMI_L2_ID (2)
|
||||
#define SMI_L4_ID (3)
|
||||
#define SMI_L7_ID (4)
|
||||
#define SMI_L9_ID (5)
|
||||
#define SMI_L11_ID (6)
|
||||
#define SMI_L13_ID (7)
|
||||
#define SMI_L14_ID (8)
|
||||
#define SMI_L16_ID (9)
|
||||
#define SMI_L17_ID (10)
|
||||
#define SMI_L19_ID (11)
|
||||
#define SMI_L20_ID (12)
|
||||
|
||||
/*
|
||||
* MM IOMMU supports 16GB dma address. We separate it to four ranges:
|
||||
* 0 ~ 4G; 4G ~ 8G; 8G ~ 12G; 12G ~ 16G, we could adjust these masters
|
||||
* locate in anyone region. BUT:
|
||||
* a) Make sure all the ports inside a larb are in one range.
|
||||
* b) The iova of any master can NOT cross the 4G/8G/12G boundary.
|
||||
*
|
||||
* This is the suggested mapping in this SoC:
|
||||
*
|
||||
* modules dma-address-region larbs-ports
|
||||
* disp/mdp 0 ~ 4G larb0/1/2
|
||||
* vcodec 4G ~ 8G larb4/7
|
||||
* imgsys/cam/ipesys 8G ~ 12G the other larbs.
|
||||
* N/A 12G ~ 16G
|
||||
*/
|
||||
|
||||
/* Larb0 -- disp */
|
||||
#define M4U_L0_P0_DISP_OVL0_4L_HDR MTK_M4U_ID(SMI_L0_ID, 0)
|
||||
#define M4U_L0_P1_DISP_OVL0_4L_RDMA0 MTK_M4U_ID(SMI_L0_ID, 1)
|
||||
#define M4U_L0_P2_DISP_OVL1_4L_RDMA1 MTK_M4U_ID(SMI_L0_ID, 2)
|
||||
#define M4U_L0_P3_DISP_OVL0_4L_RDMA2 MTK_M4U_ID(SMI_L0_ID, 3)
|
||||
#define M4U_L0_P4_DISP_OVL1_4L_RDMA3 MTK_M4U_ID(SMI_L0_ID, 4)
|
||||
#define M4U_L0_P5_DISP_RDMA0 MTK_M4U_ID(SMI_L0_ID, 5)
|
||||
#define M4U_L0_P6_DISP_WDMA0 MTK_M4U_ID(SMI_L0_ID, 6)
|
||||
#define M4U_L0_P7_DISP_FAKE_ENG0 MTK_M4U_ID(SMI_L0_ID, 7)
|
||||
|
||||
/* Larb1 -- disp */
|
||||
#define M4U_L1_P0_DISP_OVL1_4L_HDR MTK_M4U_ID(SMI_L1_ID, 0)
|
||||
#define M4U_L1_P1_DISP_OVL1_4L_RDMA0 MTK_M4U_ID(SMI_L1_ID, 1)
|
||||
#define M4U_L1_P2_DISP_OVL0_4L_RDMA1 MTK_M4U_ID(SMI_L1_ID, 2)
|
||||
#define M4U_L1_P3_DISP_OVL1_4L_RDMA2 MTK_M4U_ID(SMI_L1_ID, 3)
|
||||
#define M4U_L1_P4_DISP_OVL0_4L_RDMA3 MTK_M4U_ID(SMI_L1_ID, 4)
|
||||
#define M4U_L1_P5_DISP_RDMA1 MTK_M4U_ID(SMI_L1_ID, 5)
|
||||
#define M4U_L1_P6_DISP_WDMA1 MTK_M4U_ID(SMI_L1_ID, 6)
|
||||
#define M4U_L1_P7_DISP_FAKE_ENG1 MTK_M4U_ID(SMI_L1_ID, 7)
|
||||
|
||||
/* Larb2 -- mmlsys(mdp) */
|
||||
#define M4U_L2_P0_MDP_RDMA0 MTK_M4U_ID(SMI_L2_ID, 0)
|
||||
#define M4U_L2_P1_MDP_RDMA1 MTK_M4U_ID(SMI_L2_ID, 1)
|
||||
#define M4U_L2_P2_MDP_WROT0 MTK_M4U_ID(SMI_L2_ID, 2)
|
||||
#define M4U_L2_P3_MDP_WROT1 MTK_M4U_ID(SMI_L2_ID, 3)
|
||||
#define M4U_L2_P4_MDP_DUMMY0 MTK_M4U_ID(SMI_L2_ID, 4)
|
||||
#define M4U_L2_P5_MDP_DUMMY1 MTK_M4U_ID(SMI_L2_ID, 5)
|
||||
#define M4U_L2_P6_MDP_RDMA2 MTK_M4U_ID(SMI_L2_ID, 6)
|
||||
#define M4U_L2_P7_MDP_RDMA3 MTK_M4U_ID(SMI_L2_ID, 7)
|
||||
#define M4U_L2_P8_MDP_WROT2 MTK_M4U_ID(SMI_L2_ID, 8)
|
||||
#define M4U_L2_P9_MDP_WROT3 MTK_M4U_ID(SMI_L2_ID, 9)
|
||||
#define M4U_L2_P10_DISP_FAKE0 MTK_M4U_ID(SMI_L2_ID, 10)
|
||||
|
||||
/* Larb3: null */
|
||||
|
||||
/* Larb4 -- vdec */
|
||||
#define M4U_L4_P0_HW_VDEC_MC_EXT MTK_M4U_ID(SMI_L4_ID, 0)
|
||||
#define M4U_L4_P1_HW_VDEC_UFO_EXT MTK_M4U_ID(SMI_L4_ID, 1)
|
||||
#define M4U_L4_P2_HW_VDEC_PP_EXT MTK_M4U_ID(SMI_L4_ID, 2)
|
||||
#define M4U_L4_P3_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(SMI_L4_ID, 3)
|
||||
#define M4U_L4_P4_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(SMI_L4_ID, 4)
|
||||
#define M4U_L4_P5_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(SMI_L4_ID, 5)
|
||||
#define M4U_L4_P6_HW_VDEC_TILE_EXT MTK_M4U_ID(SMI_L4_ID, 6)
|
||||
#define M4U_L4_P7_HW_VDEC_VLD_EXT MTK_M4U_ID(SMI_L4_ID, 7)
|
||||
#define M4U_L4_P8_HW_VDEC_VLD2_EXT MTK_M4U_ID(SMI_L4_ID, 8)
|
||||
#define M4U_L4_P9_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(SMI_L4_ID, 9)
|
||||
#define M4U_L4_P10_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(SMI_L4_ID, 10)
|
||||
#define M4U_L4_P11_HW_VDEC_UFO_ENC_EXT MTK_M4U_ID(SMI_L4_ID, 11)
|
||||
|
||||
/* Larb5: null */
|
||||
|
||||
/* Larb6: null */
|
||||
|
||||
/* Larb7 -- venc */
|
||||
#define M4U_L7_P0_VENC_RCPU MTK_M4U_ID(SMI_L7_ID, 0)
|
||||
#define M4U_L7_P1_VENC_REC MTK_M4U_ID(SMI_L7_ID, 1)
|
||||
#define M4U_L7_P2_VENC_BSDMA MTK_M4U_ID(SMI_L7_ID, 2)
|
||||
#define M4U_L7_P3_VENC_SV_COMV MTK_M4U_ID(SMI_L7_ID, 3)
|
||||
#define M4U_L7_P4_VENC_RD_COMV MTK_M4U_ID(SMI_L7_ID, 4)
|
||||
#define M4U_L7_P5_JPGENC_Y_RDMA MTK_M4U_ID(SMI_L7_ID, 5)
|
||||
#define M4U_L7_P6_JPGENC_C_RDMA MTK_M4U_ID(SMI_L7_ID, 6)
|
||||
#define M4U_L7_P7_JPGENC_Q_RDMA MTK_M4U_ID(SMI_L7_ID, 7)
|
||||
#define M4U_L7_P8_VENC_SUB_W_LUMA MTK_M4U_ID(SMI_L7_ID, 8)
|
||||
#define M4U_L7_P9_JPGENC_BSDMA MTK_M4U_ID(SMI_L7_ID, 9)
|
||||
#define M4U_L7_P10_VENC_CUR_LUMA MTK_M4U_ID(SMI_L7_ID, 10)
|
||||
#define M4U_L7_P11_VENC_CUR_CHROMA MTK_M4U_ID(SMI_L7_ID, 11)
|
||||
#define M4U_L7_P12_VENC_REF_LUMA MTK_M4U_ID(SMI_L7_ID, 12)
|
||||
#define M4U_L7_P13_VENC_REF_CHROMA MTK_M4U_ID(SMI_L7_ID, 13)
|
||||
#define M4U_L7_P14_VENC_SUB_R_LUMA MTK_M4U_ID(SMI_L7_ID, 14)
|
||||
#define M4U_L7_P15_JPGDEC_WDMA MTK_M4U_ID(SMI_L7_ID, 15)
|
||||
#define M4U_L7_P16_JPGDEC_BSDMA MTK_M4U_ID(SMI_L7_ID, 16)
|
||||
#define M4U_L7_P17_JPGDEC_HUFF_OFFSET MTK_M4U_ID(SMI_L7_ID, 17)
|
||||
|
||||
/* Larb8: null */
|
||||
|
||||
/* Larb9 --imgsys */
|
||||
#define M4U_L9_P0_IMGI_D1 MTK_M4U_ID(SMI_L9_ID, 0)
|
||||
#define M4U_L9_P1_IMGBI_D1 MTK_M4U_ID(SMI_L9_ID, 1)
|
||||
#define M4U_L9_P2_DMGI_D1 MTK_M4U_ID(SMI_L9_ID, 2)
|
||||
#define M4U_L9_P3_DEPI_D1 MTK_M4U_ID(SMI_L9_ID, 3)
|
||||
#define M4U_L9_P4_LCE_D1 MTK_M4U_ID(SMI_L9_ID, 4)
|
||||
#define M4U_L9_P5_SMTI_D1 MTK_M4U_ID(SMI_L9_ID, 5)
|
||||
#define M4U_L9_P6_SMTO_D2 MTK_M4U_ID(SMI_L9_ID, 6)
|
||||
#define M4U_L9_P7_SMTO_D1 MTK_M4U_ID(SMI_L9_ID, 7)
|
||||
#define M4U_L9_P8_CRZO_D1 MTK_M4U_ID(SMI_L9_ID, 8)
|
||||
#define M4U_L9_P9_IMG3O_D1 MTK_M4U_ID(SMI_L9_ID, 9)
|
||||
#define M4U_L9_P10_VIPI_D1 MTK_M4U_ID(SMI_L9_ID, 10)
|
||||
#define M4U_L9_P11_SMTI_D5 MTK_M4U_ID(SMI_L9_ID, 11)
|
||||
#define M4U_L9_P12_TIMGO_D1 MTK_M4U_ID(SMI_L9_ID, 12)
|
||||
#define M4U_L9_P13_UFBC_W0 MTK_M4U_ID(SMI_L9_ID, 13)
|
||||
#define M4U_L9_P14_UFBC_R0 MTK_M4U_ID(SMI_L9_ID, 14)
|
||||
#define M4U_L9_P15_WPE_RDMA1 MTK_M4U_ID(SMI_L9_ID, 15)
|
||||
#define M4U_L9_P16_WPE_RDMA0 MTK_M4U_ID(SMI_L9_ID, 16)
|
||||
#define M4U_L9_P17_WPE_WDMA MTK_M4U_ID(SMI_L9_ID, 17)
|
||||
#define M4U_L9_P18_MFB_RDMA0 MTK_M4U_ID(SMI_L9_ID, 18)
|
||||
#define M4U_L9_P19_MFB_RDMA1 MTK_M4U_ID(SMI_L9_ID, 19)
|
||||
#define M4U_L9_P20_MFB_RDMA2 MTK_M4U_ID(SMI_L9_ID, 20)
|
||||
#define M4U_L9_P21_MFB_RDMA3 MTK_M4U_ID(SMI_L9_ID, 21)
|
||||
#define M4U_L9_P22_MFB_RDMA4 MTK_M4U_ID(SMI_L9_ID, 22)
|
||||
#define M4U_L9_P23_MFB_RDMA5 MTK_M4U_ID(SMI_L9_ID, 23)
|
||||
#define M4U_L9_P24_MFB_WDMA0 MTK_M4U_ID(SMI_L9_ID, 24)
|
||||
#define M4U_L9_P25_MFB_WDMA1 MTK_M4U_ID(SMI_L9_ID, 25)
|
||||
#define M4U_L9_P26_RESERVE6 MTK_M4U_ID(SMI_L9_ID, 26)
|
||||
#define M4U_L9_P27_RESERVE7 MTK_M4U_ID(SMI_L9_ID, 27)
|
||||
#define M4U_L9_P28_RESERVE8 MTK_M4U_ID(SMI_L9_ID, 28)
|
||||
|
||||
/* Larb10: null */
|
||||
|
||||
/* Larb11 -- imgsys */
|
||||
#define M4U_L11_P0_IMGI_D1 MTK_M4U_ID(SMI_L11_ID, 0)
|
||||
#define M4U_L11_P1_IMGBI_D1 MTK_M4U_ID(SMI_L11_ID, 1)
|
||||
#define M4U_L11_P2_DMGI_D1 MTK_M4U_ID(SMI_L11_ID, 2)
|
||||
#define M4U_L11_P3_DEPI_D1 MTK_M4U_ID(SMI_L11_ID, 3)
|
||||
#define M4U_L11_P4_LCE_D1 MTK_M4U_ID(SMI_L11_ID, 4)
|
||||
#define M4U_L11_P5_SMTI_D1 MTK_M4U_ID(SMI_L11_ID, 5)
|
||||
#define M4U_L11_P6_SMTO_D2 MTK_M4U_ID(SMI_L11_ID, 6)
|
||||
#define M4U_L11_P7_SMTO_D1 MTK_M4U_ID(SMI_L11_ID, 7)
|
||||
#define M4U_L11_P8_CRZO_D1 MTK_M4U_ID(SMI_L11_ID, 8)
|
||||
#define M4U_L11_P9_IMG3O_D1 MTK_M4U_ID(SMI_L11_ID, 9)
|
||||
#define M4U_L11_P10_VIPI_D1 MTK_M4U_ID(SMI_L11_ID, 10)
|
||||
#define M4U_L11_P11_SMTI_D5 MTK_M4U_ID(SMI_L11_ID, 11)
|
||||
#define M4U_L11_P12_TIMGO_D1 MTK_M4U_ID(SMI_L11_ID, 12)
|
||||
#define M4U_L11_P13_UFBC_W0 MTK_M4U_ID(SMI_L11_ID, 13)
|
||||
#define M4U_L11_P14_UFBC_R0 MTK_M4U_ID(SMI_L11_ID, 14)
|
||||
#define M4U_L11_P15_WPE_RDMA1 MTK_M4U_ID(SMI_L11_ID, 15)
|
||||
#define M4U_L11_P16_WPE_RDMA0 MTK_M4U_ID(SMI_L11_ID, 16)
|
||||
#define M4U_L11_P17_WPE_WDMA MTK_M4U_ID(SMI_L11_ID, 17)
|
||||
#define M4U_L11_P18_MFB_RDMA0 MTK_M4U_ID(SMI_L11_ID, 18)
|
||||
#define M4U_L11_P19_MFB_RDMA1 MTK_M4U_ID(SMI_L11_ID, 19)
|
||||
#define M4U_L11_P20_MFB_RDMA2 MTK_M4U_ID(SMI_L11_ID, 20)
|
||||
#define M4U_L11_P21_MFB_RDMA3 MTK_M4U_ID(SMI_L11_ID, 21)
|
||||
#define M4U_L11_P22_MFB_RDMA4 MTK_M4U_ID(SMI_L11_ID, 22)
|
||||
#define M4U_L11_P23_MFB_RDMA5 MTK_M4U_ID(SMI_L11_ID, 23)
|
||||
#define M4U_L11_P24_MFB_WDMA0 MTK_M4U_ID(SMI_L11_ID, 24)
|
||||
#define M4U_L11_P25_MFB_WDMA1 MTK_M4U_ID(SMI_L11_ID, 25)
|
||||
#define M4U_L11_P26_RESERVE6 MTK_M4U_ID(SMI_L11_ID, 26)
|
||||
#define M4U_L11_P27_RESERVE7 MTK_M4U_ID(SMI_L11_ID, 27)
|
||||
#define M4U_L11_P28_RESERVE8 MTK_M4U_ID(SMI_L11_ID, 28)
|
||||
|
||||
/* Larb12: null */
|
||||
|
||||
/* Larb13 -- cam */
|
||||
#define M4U_L13_P0_MRAWI MTK_M4U_ID(SMI_L13_ID, 0)
|
||||
#define M4U_L13_P1_MRAWO_0 MTK_M4U_ID(SMI_L13_ID, 1)
|
||||
#define M4U_L13_P2_MRAWO_1 MTK_M4U_ID(SMI_L13_ID, 2)
|
||||
#define M4U_L13_P3_CAMSV_1 MTK_M4U_ID(SMI_L13_ID, 3)
|
||||
#define M4U_L13_P4_CAMSV_2 MTK_M4U_ID(SMI_L13_ID, 4)
|
||||
#define M4U_L13_P5_CAMSV_3 MTK_M4U_ID(SMI_L13_ID, 5)
|
||||
#define M4U_L13_P6_CAMSV_4 MTK_M4U_ID(SMI_L13_ID, 6)
|
||||
#define M4U_L13_P7_CAMSV_5 MTK_M4U_ID(SMI_L13_ID, 7)
|
||||
#define M4U_L13_P8_CAMSV_6 MTK_M4U_ID(SMI_L13_ID, 8)
|
||||
#define M4U_L13_P9_CCUI MTK_M4U_ID(SMI_L13_ID, 9)
|
||||
#define M4U_L13_P10_CCUO MTK_M4U_ID(SMI_L13_ID, 10)
|
||||
#define M4U_L13_P11_FAKE MTK_M4U_ID(SMI_L13_ID, 11)
|
||||
#define M4U_L13_P12_PDAI_0 MTK_M4U_ID(SMI_L13_ID, 12)
|
||||
#define M4U_L13_P13_PDAI_1 MTK_M4U_ID(SMI_L13_ID, 13)
|
||||
#define M4U_L13_P14_PDAO MTK_M4U_ID(SMI_L13_ID, 14)
|
||||
|
||||
/* Larb14 -- cam */
|
||||
#define M4U_L14_P0_RESERVE MTK_M4U_ID(SMI_L14_ID, 0)
|
||||
#define M4U_L14_P1_RESERVE MTK_M4U_ID(SMI_L14_ID, 1)
|
||||
#define M4U_L14_P2_RESERVE MTK_M4U_ID(SMI_L14_ID, 2)
|
||||
#define M4U_L14_P3_CAMSV_0 MTK_M4U_ID(SMI_L14_ID, 3)
|
||||
#define M4U_L14_P4_CCUI MTK_M4U_ID(SMI_L14_ID, 4)
|
||||
#define M4U_L14_P5_CCUO MTK_M4U_ID(SMI_L14_ID, 5)
|
||||
#define M4U_L14_P6_CAMSV_7 MTK_M4U_ID(SMI_L14_ID, 6)
|
||||
#define M4U_L14_P7_CAMSV_8 MTK_M4U_ID(SMI_L14_ID, 7)
|
||||
#define M4U_L14_P8_CAMSV_9 MTK_M4U_ID(SMI_L14_ID, 8)
|
||||
#define M4U_L14_P9_CAMSV_10 MTK_M4U_ID(SMI_L14_ID, 9)
|
||||
|
||||
/* Larb15: null */
|
||||
|
||||
/* Larb16 -- cam */
|
||||
#define M4U_L16_P0_IMGO_R1_A MTK_M4U_ID(SMI_L16_ID, 0)
|
||||
#define M4U_L16_P1_RRZO_R1_A MTK_M4U_ID(SMI_L16_ID, 1)
|
||||
#define M4U_L16_P2_CQI_R1_A MTK_M4U_ID(SMI_L16_ID, 2)
|
||||
#define M4U_L16_P3_BPCI_R1_A MTK_M4U_ID(SMI_L16_ID, 3)
|
||||
#define M4U_L16_P4_YUVO_R1_A MTK_M4U_ID(SMI_L16_ID, 4)
|
||||
#define M4U_L16_P5_UFDI_R2_A MTK_M4U_ID(SMI_L16_ID, 5)
|
||||
#define M4U_L16_P6_RAWI_R2_A MTK_M4U_ID(SMI_L16_ID, 6)
|
||||
#define M4U_L16_P7_RAWI_R3_A MTK_M4U_ID(SMI_L16_ID, 7)
|
||||
#define M4U_L16_P8_AAO_R1_A MTK_M4U_ID(SMI_L16_ID, 8)
|
||||
#define M4U_L16_P9_AFO_R1_A MTK_M4U_ID(SMI_L16_ID, 9)
|
||||
#define M4U_L16_P10_FLKO_R1_A MTK_M4U_ID(SMI_L16_ID, 10)
|
||||
#define M4U_L16_P11_LCESO_R1_A MTK_M4U_ID(SMI_L16_ID, 11)
|
||||
#define M4U_L16_P12_CRZO_R1_A MTK_M4U_ID(SMI_L16_ID, 12)
|
||||
#define M4U_L16_P13_LTMSO_R1_A MTK_M4U_ID(SMI_L16_ID, 13)
|
||||
#define M4U_L16_P14_RSSO_R1_A MTK_M4U_ID(SMI_L16_ID, 14)
|
||||
#define M4U_L16_P15_AAHO_R1_A MTK_M4U_ID(SMI_L16_ID, 15)
|
||||
#define M4U_L16_P16_LSCI_R1_A MTK_M4U_ID(SMI_L16_ID, 16)
|
||||
|
||||
/* Larb17 -- cam */
|
||||
#define M4U_L17_P0_IMGO_R1_B MTK_M4U_ID(SMI_L17_ID, 0)
|
||||
#define M4U_L17_P1_RRZO_R1_B MTK_M4U_ID(SMI_L17_ID, 1)
|
||||
#define M4U_L17_P2_CQI_R1_B MTK_M4U_ID(SMI_L17_ID, 2)
|
||||
#define M4U_L17_P3_BPCI_R1_B MTK_M4U_ID(SMI_L17_ID, 3)
|
||||
#define M4U_L17_P4_YUVO_R1_B MTK_M4U_ID(SMI_L17_ID, 4)
|
||||
#define M4U_L17_P5_UFDI_R2_B MTK_M4U_ID(SMI_L17_ID, 5)
|
||||
#define M4U_L17_P6_RAWI_R2_B MTK_M4U_ID(SMI_L17_ID, 6)
|
||||
#define M4U_L17_P7_RAWI_R3_B MTK_M4U_ID(SMI_L17_ID, 7)
|
||||
#define M4U_L17_P8_AAO_R1_B MTK_M4U_ID(SMI_L17_ID, 8)
|
||||
#define M4U_L17_P9_AFO_R1_B MTK_M4U_ID(SMI_L17_ID, 9)
|
||||
#define M4U_L17_P10_FLKO_R1_B MTK_M4U_ID(SMI_L17_ID, 10)
|
||||
#define M4U_L17_P11_LCESO_R1_B MTK_M4U_ID(SMI_L17_ID, 11)
|
||||
#define M4U_L17_P12_CRZO_R1_B MTK_M4U_ID(SMI_L17_ID, 12)
|
||||
#define M4U_L17_P13_LTMSO_R1_B MTK_M4U_ID(SMI_L17_ID, 13)
|
||||
#define M4U_L17_P14_RSSO_R1_B MTK_M4U_ID(SMI_L17_ID, 14)
|
||||
#define M4U_L17_P15_AAHO_R1_B MTK_M4U_ID(SMI_L17_ID, 15)
|
||||
#define M4U_L17_P16_LSCI_R1_B MTK_M4U_ID(SMI_L17_ID, 16)
|
||||
|
||||
/* Larb19 -- ipesys */
|
||||
#define M4U_L19_P0_DVS_RDMA MTK_M4U_ID(SMI_L19_ID, 0)
|
||||
#define M4U_L19_P1_DVS_WDMA MTK_M4U_ID(SMI_L19_ID, 1)
|
||||
#define M4U_L19_P2_DVP_RDMA MTK_M4U_ID(SMI_L19_ID, 2)
|
||||
#define M4U_L19_P3_DVP_WDMA MTK_M4U_ID(SMI_L19_ID, 3)
|
||||
|
||||
/* Larb20 -- ipesys */
|
||||
#define M4U_L20_P0_FDVT_RDA_0 MTK_M4U_ID(SMI_L20_ID, 0)
|
||||
#define M4U_L20_P1_FDVT_RDB_0 MTK_M4U_ID(SMI_L20_ID, 1)
|
||||
#define M4U_L20_P2_FDVT_WRA_0 MTK_M4U_ID(SMI_L20_ID, 2)
|
||||
#define M4U_L20_P3_FDVT_WRB_0 MTK_M4U_ID(SMI_L20_ID, 3)
|
||||
#define M4U_L20_P4_RSC_RDMA MTK_M4U_ID(SMI_L20_ID, 4)
|
||||
#define M4U_L20_P5_RSC_WDMA MTK_M4U_ID(SMI_L20_ID, 5)
|
||||
|
||||
/* fake larb21 for gce */
|
||||
#define M4U_L21_GCE_DM MTK_M4U_ID(21, 0)
|
||||
#define M4U_L21_GCE_MM MTK_M4U_ID(21, 1)
|
||||
|
||||
/* fake larb & port for svp and dual svp and wfd */
|
||||
#define M4U_PORT_SVP_HEAP MTK_M4U_ID(22, 0)
|
||||
#define M4U_PORT_DUAL_SVP_HEAP MTK_M4U_ID(22, 1)
|
||||
#define M4U_PORT_WFD_HEAP MTK_M4U_ID(22, 2)
|
||||
|
||||
/* fake larb0 for apu */
|
||||
#define M4U_L0_APU_DATA MTK_M4U_ID(0, 0)
|
||||
#define M4U_L0_APU_CODE MTK_M4U_ID(0, 1)
|
||||
#define M4U_L0_APU_SECURE MTK_M4U_ID(0, 2)
|
||||
#define M4U_L0_APU_VLM MTK_M4U_ID(0, 3)
|
||||
|
||||
/* infra/peri */
|
||||
#define IFR_IOMMU_PORT_PCIE_0 MTK_IFAIOMMU_PERI_ID(0, 26)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#ifndef __GENERIC_PT_COMMON_H
|
||||
#define __GENERIC_PT_COMMON_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/bits.h>
|
||||
|
||||
/**
|
||||
* DOC: Generic Radix Page Table
|
||||
*
|
||||
* Generic Radix Page Table is a set of functions and helpers to efficiently
|
||||
* parse radix style page tables typically seen in HW implementations. The
|
||||
* interface is built to deliver similar code generation as the mm's pte/pmd/etc
|
||||
* system by fully inlining the exact code required to handle each table level.
|
||||
*
|
||||
* Like the mm subsystem each format contributes its parsing implementation
|
||||
* under common names and the common code implements the required algorithms.
|
||||
*
|
||||
* The system is divided into three logical levels:
|
||||
*
|
||||
* - The page table format and its manipulation functions
|
||||
* - Generic helpers to give a consistent API regardless of underlying format
|
||||
* - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM)
|
||||
*
|
||||
* Multiple implementations are supported. The intention is to have the generic
|
||||
* format code be re-usable for whatever specialized implementation is required.
|
||||
* The generic code is solely about the format of the radix tree; it does not
|
||||
* include memory allocation or higher level decisions that are left for the
|
||||
* implementation.
|
||||
*
|
||||
* The generic framework supports a superset of functions across many HW
|
||||
* implementations:
|
||||
*
|
||||
* - Entries comprised of contiguous blocks of IO PTEs for larger page sizes
|
||||
* - Multi-level tables, up to 6 levels. Runtime selected top level
|
||||
* - Runtime variable table level size (ARM's concatenated tables)
|
||||
* - Expandable top level allowing dynamic sizing of table levels
|
||||
* - Optional leaf entries at any level
|
||||
* - 32-bit/64-bit virtual and output addresses, using every address bit
|
||||
* - Dirty tracking
|
||||
* - Sign extended addressing
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct pt_common - struct for all page table implementations
|
||||
*/
|
||||
struct pt_common {
|
||||
/**
|
||||
* @top_of_table: Encodes the table top pointer and the top level in a
|
||||
* single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower
|
||||
* bits of the aligned table pointer are used for the level.
|
||||
*/
|
||||
uintptr_t top_of_table;
|
||||
/**
|
||||
* @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits
|
||||
* must be zero. This may be less than what the page table format
|
||||
* supports, but must not be more.
|
||||
*/
|
||||
u8 max_oasz_lg2;
|
||||
/**
|
||||
* @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits
|
||||
* are 0 or 1 depending on pt_full_va_prefix(). This may be less than
|
||||
* what the page table format supports, but must not be more. When
|
||||
* PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability.
|
||||
*/
|
||||
u8 max_vasz_lg2;
|
||||
/**
|
||||
* @features: Bitmap of `enum pt_features`
|
||||
*/
|
||||
unsigned int features;
|
||||
};
|
||||
|
||||
/* Encoding parameters for top_of_table */
|
||||
enum {
|
||||
PT_TOP_LEVEL_BITS = 3,
|
||||
PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0),
|
||||
};
|
||||
|
||||
/**
|
||||
* enum pt_features - Features turned on in the table. Each symbol is a bit
|
||||
* position.
|
||||
*/
|
||||
enum pt_features {
|
||||
/**
|
||||
* @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
|
||||
* assuming the HW can read it. Otherwise a SMP release is sufficient
|
||||
* for HW to read it.
|
||||
*/
|
||||
PT_FEAT_DMA_INCOHERENT,
|
||||
/**
|
||||
* @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
|
||||
* PT_VADDR_MAX.
|
||||
*/
|
||||
PT_FEAT_FULL_VA,
|
||||
/**
|
||||
* @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased
|
||||
* dynamically during map. This requires HW support for atomically
|
||||
* setting both the table top pointer and the starting table level.
|
||||
*/
|
||||
PT_FEAT_DYNAMIC_TOP,
|
||||
/**
|
||||
* @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign
|
||||
* extends up to the full pt_vaddr_t. This divides the page table into
|
||||
* three VA ranges::
|
||||
*
|
||||
* 0 -> 2^N - 1 Lower
|
||||
* 2^N -> (MAX - 2^N - 1) Non-Canonical
|
||||
* MAX - 2^N -> MAX Upper
|
||||
*
|
||||
* In this mode pt_common::max_vasz_lg2 includes the sign bit and the
|
||||
* upper bits that don't fall within the translation are just validated.
|
||||
*
|
||||
* If not set there is no sign extension and valid VA goes from 0 to 2^N
|
||||
* - 1.
|
||||
*/
|
||||
PT_FEAT_SIGN_EXTEND,
|
||||
/**
|
||||
* @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA
|
||||
* ranges which will clean out any walk cache or any IOPTE fully
|
||||
* contained by the range. The optimization objective is to minimize the
|
||||
* number of flushes even if ranges include IOVA gaps that do not need
|
||||
* to be flushed.
|
||||
*/
|
||||
PT_FEAT_FLUSH_RANGE,
|
||||
/**
|
||||
* @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that
|
||||
* the optimization objective is to only flush IOVA that has been
|
||||
* changed. This mode is suitable for cases like hypervisor shadowing
|
||||
* where flushing unchanged ranges may cause the hypervisor to reparse
|
||||
* significant amount of page table.
|
||||
*/
|
||||
PT_FEAT_FLUSH_RANGE_NO_GAPS,
|
||||
/* private: */
|
||||
PT_FEAT_FMT_START,
|
||||
};
|
||||
|
||||
struct pt_amdv1 {
|
||||
struct pt_common common;
|
||||
};
|
||||
|
||||
enum {
|
||||
/*
|
||||
* The memory backing the tables is encrypted. Use __sme_set() to adjust
|
||||
* the page table pointers in the tree. This only works with
|
||||
* CONFIG_AMD_MEM_ENCRYPT.
|
||||
*/
|
||||
PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START,
|
||||
/*
|
||||
* The PTEs are set to prevent cache incoherent traffic, such as PCI no
|
||||
* snoop. This is set either at creation time or before the first map
|
||||
* operation.
|
||||
*/
|
||||
PT_FEAT_AMDV1_FORCE_COHERENCE,
|
||||
};
|
||||
|
||||
struct pt_vtdss {
|
||||
struct pt_common common;
|
||||
};
|
||||
|
||||
enum {
|
||||
/*
|
||||
* The PTEs are set to prevent cache incoherent traffic, such as PCI no
|
||||
* snoop. This is set either at creation time or before the first map
|
||||
* operation.
|
||||
*/
|
||||
PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START,
|
||||
/*
|
||||
* Prevent creating read-only PTEs. Used to work around HW errata
|
||||
* ERRATA_772415_SPR17.
|
||||
*/
|
||||
PT_FEAT_VTDSS_FORCE_WRITEABLE,
|
||||
};
|
||||
|
||||
struct pt_x86_64 {
|
||||
struct pt_common common;
|
||||
};
|
||||
|
||||
enum {
|
||||
/*
|
||||
* The memory backing the tables is encrypted. Use __sme_set() to adjust
|
||||
* the page table pointers in the tree. This only works with
|
||||
* CONFIG_AMD_MEM_ENCRYPT.
|
||||
*/
|
||||
PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,293 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#ifndef __GENERIC_PT_IOMMU_H
|
||||
#define __GENERIC_PT_IOMMU_H
|
||||
|
||||
#include <linux/generic_pt/common.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
struct iommu_iotlb_gather;
|
||||
struct pt_iommu_ops;
|
||||
struct pt_iommu_driver_ops;
|
||||
struct iommu_dirty_bitmap;
|
||||
|
||||
/**
|
||||
* DOC: IOMMU Radix Page Table
|
||||
*
|
||||
* The IOMMU implementation of the Generic Page Table provides an ops struct
|
||||
* that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
|
||||
* the generic map/unmap interface.
|
||||
*
|
||||
* This interface uses a caller provided locking approach. The caller must have
|
||||
* a VA range lock concept that prevents concurrent threads from calling ops on
|
||||
* the same VA. Generally the range lock must be at least as large as a single
|
||||
* map call.
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct pt_iommu - Base structure for IOMMU page tables
|
||||
*
|
||||
* The format-specific struct will include this as the first member.
|
||||
*/
|
||||
struct pt_iommu {
|
||||
/**
|
||||
* @domain: The core IOMMU domain. The driver should use a union to
|
||||
* overlay this memory with its previously existing domain struct to
|
||||
* create an alias.
|
||||
*/
|
||||
struct iommu_domain domain;
|
||||
|
||||
/**
|
||||
* @ops: Function pointers to access the API
|
||||
*/
|
||||
const struct pt_iommu_ops *ops;
|
||||
|
||||
/**
|
||||
* @driver_ops: Function pointers provided by the HW driver to help
|
||||
* manage HW details like caches.
|
||||
*/
|
||||
const struct pt_iommu_driver_ops *driver_ops;
|
||||
|
||||
/**
|
||||
* @nid: Node ID to use for table memory allocations. The IOMMU driver
|
||||
* may want to set the NID to the device's NID, if there are multiple
|
||||
* table walkers.
|
||||
*/
|
||||
int nid;
|
||||
|
||||
/**
|
||||
* @iommu_device: Device pointer used for any DMA cache flushing when
|
||||
* PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
|
||||
* page table which must have dma ops that perform cache flushing.
|
||||
*/
|
||||
struct device *iommu_device;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt_iommu_info - Details about the IOMMU page table
|
||||
*
|
||||
* Returned from pt_iommu_ops->get_info()
|
||||
*/
|
||||
struct pt_iommu_info {
|
||||
/**
|
||||
* @pgsize_bitmap: A bitmask where each set bit indicates
|
||||
* a page size that can be natively stored in the page table.
|
||||
*/
|
||||
u64 pgsize_bitmap;
|
||||
};
|
||||
|
||||
struct pt_iommu_ops {
|
||||
/**
|
||||
* @set_dirty: Make the iova write dirty
|
||||
* @iommu_table: Table to manipulate
|
||||
* @iova: IO virtual address to start
|
||||
*
|
||||
* This is only used by iommufd testing. It makes the iova dirty so that
|
||||
* read_and_clear_dirty() will see it as dirty. Unlike all the other ops
|
||||
* this one is safe to call without holding any locking. It may return
|
||||
* -EAGAIN if there is a race.
|
||||
*/
|
||||
int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);
|
||||
|
||||
/**
|
||||
* @get_info: Return the pt_iommu_info structure
|
||||
* @iommu_table: Table to query
|
||||
*
|
||||
* Return some basic static information about the page table.
|
||||
*/
|
||||
void (*get_info)(struct pt_iommu *iommu_table,
|
||||
struct pt_iommu_info *info);
|
||||
|
||||
/**
|
||||
* @deinit: Undo a format specific init operation
|
||||
* @iommu_table: Table to destroy
|
||||
*
|
||||
* Release all of the memory. The caller must have already removed the
|
||||
* table from all HW access and all caches.
|
||||
*/
|
||||
void (*deinit)(struct pt_iommu *iommu_table);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
|
||||
*
|
||||
* The IOMMU driver should implement these using container_of(iommu_table) to
|
||||
* get to it's iommu_domain derived structure. All ops can be called in atomic
|
||||
* contexts as they are buried under DMA API calls.
|
||||
*/
|
||||
struct pt_iommu_driver_ops {
|
||||
/**
|
||||
* @change_top: Update the top of table pointer
|
||||
* @iommu_table: Table to operate on
|
||||
* @top_paddr: New CPU physical address of the top pointer
|
||||
* @top_level: IOMMU PT level of the new top
|
||||
*
|
||||
* Called under the get_top_lock() spinlock. The driver must update all
|
||||
* HW references to this domain with a new top address and
|
||||
* configuration. On return mappings placed in the new top must be
|
||||
* reachable by the HW.
|
||||
*
|
||||
* top_level encodes the level in IOMMU PT format, level 0 is the
|
||||
* smallest page size increasing from there. This has to be translated
|
||||
* to any HW specific format. During this call the new top will not be
|
||||
* visible to any other API.
|
||||
*
|
||||
* This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
|
||||
* enabled.
|
||||
*/
|
||||
void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
|
||||
unsigned int top_level);
|
||||
|
||||
/**
|
||||
* @get_top_lock: lock to hold when changing the table top
|
||||
* @iommu_table: Table to operate on
|
||||
*
|
||||
* Return a lock to hold when changing the table top page table from
|
||||
* being stored in HW. The lock will be held prior to calling
|
||||
* change_top() and released once the top is fully visible.
|
||||
*
|
||||
* Typically this would be a lock that protects the iommu_domain's
|
||||
* attachment list.
|
||||
*
|
||||
* This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
|
||||
* enabled.
|
||||
*/
|
||||
spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
|
||||
};
|
||||
|
||||
static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
|
||||
{
|
||||
/*
|
||||
* It is safe to call pt_iommu_deinit() before an init, or if init
|
||||
* fails. The ops pointer will only become non-NULL if deinit needs to be
|
||||
* run.
|
||||
*/
|
||||
if (iommu_table->ops)
|
||||
iommu_table->ops->deinit(iommu_table);
|
||||
}
|
||||
|
||||
/**
|
||||
* struct pt_iommu_cfg - Common configuration values for all formats
|
||||
*/
|
||||
struct pt_iommu_cfg {
|
||||
/**
|
||||
* @features: Features required. Only these features will be turned on.
|
||||
* The feature list should reflect what the IOMMU HW is capable of.
|
||||
*/
|
||||
unsigned int features;
|
||||
/**
|
||||
* @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
|
||||
* imply the top level of the table.
|
||||
*/
|
||||
u8 hw_max_vasz_lg2;
|
||||
/**
|
||||
* @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
|
||||
* might select a lower maximum OA.
|
||||
*/
|
||||
u8 hw_max_oasz_lg2;
|
||||
};
|
||||
|
||||
/* Generate the exported function signatures from iommu_pt.h */
|
||||
#define IOMMU_PROTOTYPES(fmt) \
|
||||
phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
|
||||
dma_addr_t iova); \
|
||||
int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \
|
||||
unsigned long iova, phys_addr_t paddr, \
|
||||
size_t pgsize, size_t pgcount, \
|
||||
int prot, gfp_t gfp, size_t *mapped); \
|
||||
size_t pt_iommu_##fmt##_unmap_pages( \
|
||||
struct iommu_domain *domain, unsigned long iova, \
|
||||
size_t pgsize, size_t pgcount, \
|
||||
struct iommu_iotlb_gather *iotlb_gather); \
|
||||
int pt_iommu_##fmt##_read_and_clear_dirty( \
|
||||
struct iommu_domain *domain, unsigned long iova, size_t size, \
|
||||
unsigned long flags, struct iommu_dirty_bitmap *dirty); \
|
||||
int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \
|
||||
const struct pt_iommu_##fmt##_cfg *cfg, \
|
||||
gfp_t gfp); \
|
||||
void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \
|
||||
struct pt_iommu_##fmt##_hw_info *info)
|
||||
#define IOMMU_FORMAT(fmt, member) \
|
||||
struct pt_iommu_##fmt { \
|
||||
struct pt_iommu iommu; \
|
||||
struct pt_##fmt member; \
|
||||
}; \
|
||||
IOMMU_PROTOTYPES(fmt)
|
||||
|
||||
/*
|
||||
* A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
|
||||
* iommu_pt
|
||||
*/
|
||||
#define IOMMU_PT_DOMAIN_OPS(fmt) \
|
||||
.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
|
||||
.map_pages = &pt_iommu_##fmt##_map_pages, \
|
||||
.unmap_pages = &pt_iommu_##fmt##_unmap_pages
|
||||
#define IOMMU_PT_DIRTY_OPS(fmt) \
|
||||
.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
|
||||
|
||||
/*
|
||||
* The driver should setup its domain struct like
|
||||
* union {
|
||||
* struct iommu_domain domain;
|
||||
* struct pt_iommu_xxx xx;
|
||||
* };
|
||||
* PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
|
||||
*
|
||||
* Which creates an alias between driver_domain.domain and
|
||||
* driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
|
||||
* driver_domain.domain users.
|
||||
*/
|
||||
#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
|
||||
static_assert(offsetof(s, pt_iommu_memb.domain) == \
|
||||
offsetof(s, domain_memb))
|
||||
|
||||
struct pt_iommu_amdv1_cfg {
|
||||
struct pt_iommu_cfg common;
|
||||
unsigned int starting_level;
|
||||
};
|
||||
|
||||
struct pt_iommu_amdv1_hw_info {
|
||||
u64 host_pt_root;
|
||||
u8 mode;
|
||||
};
|
||||
|
||||
IOMMU_FORMAT(amdv1, amdpt);
|
||||
|
||||
/* amdv1_mock is used by the iommufd selftest */
|
||||
#define pt_iommu_amdv1_mock pt_iommu_amdv1
|
||||
#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
|
||||
struct pt_iommu_amdv1_mock_hw_info;
|
||||
IOMMU_PROTOTYPES(amdv1_mock);
|
||||
|
||||
struct pt_iommu_vtdss_cfg {
|
||||
struct pt_iommu_cfg common;
|
||||
/* 4 is a 57 bit 5 level table */
|
||||
unsigned int top_level;
|
||||
};
|
||||
|
||||
struct pt_iommu_vtdss_hw_info {
|
||||
u64 ssptptr;
|
||||
u8 aw;
|
||||
};
|
||||
|
||||
IOMMU_FORMAT(vtdss, vtdss_pt);
|
||||
|
||||
struct pt_iommu_x86_64_cfg {
|
||||
struct pt_iommu_cfg common;
|
||||
/* 4 is a 57 bit 5 level table */
|
||||
unsigned int top_level;
|
||||
};
|
||||
|
||||
struct pt_iommu_x86_64_hw_info {
|
||||
u64 gcr3_pt;
|
||||
u8 levels;
|
||||
};
|
||||
|
||||
IOMMU_FORMAT(x86_64, x86_64_pt);
|
||||
|
||||
#undef IOMMU_PROTOTYPES
|
||||
#undef IOMMU_FORMAT
|
||||
#endif
|
||||
|
|
@ -15,8 +15,6 @@ enum io_pgtable_fmt {
|
|||
ARM_64_LPAE_S2,
|
||||
ARM_V7S,
|
||||
ARM_MALI_LPAE,
|
||||
AMD_IOMMU_V1,
|
||||
AMD_IOMMU_V2,
|
||||
APPLE_DART,
|
||||
APPLE_DART2,
|
||||
IO_PGTABLE_NUM_FMTS,
|
||||
|
|
|
|||
|
|
@ -751,7 +751,8 @@ struct iommu_ops {
|
|||
* @free: Release the domain after use.
|
||||
*/
|
||||
struct iommu_domain_ops {
|
||||
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
|
||||
int (*attach_dev)(struct iommu_domain *domain, struct device *dev,
|
||||
struct iommu_domain *old);
|
||||
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
|
||||
ioasid_t pasid, struct iommu_domain *old);
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/fwnode.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
#define IMSIC_MMIO_PAGE_SHIFT 12
|
||||
#define IMSIC_MMIO_PAGE_SZ BIT(IMSIC_MMIO_PAGE_SHIFT)
|
||||
|
|
@ -86,7 +85,7 @@ static inline const struct imsic_global_config *imsic_get_global_config(void)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_RISCV_IMSIC)
|
||||
int imsic_platform_acpi_probe(struct fwnode_handle *fwnode);
|
||||
struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -13,9 +13,6 @@
|
|||
|
||||
static unsigned long HUGEPAGE_SIZE;
|
||||
|
||||
#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
|
||||
#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
|
||||
|
||||
static unsigned long get_huge_page_size(void)
|
||||
{
|
||||
char buf[80];
|
||||
|
|
@ -2058,6 +2055,12 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
|
|||
|
||||
FIXTURE_SETUP(iommufd_dirty_tracking)
|
||||
{
|
||||
struct iommu_option cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.option_id = IOMMU_OPTION_HUGE_PAGES,
|
||||
.op = IOMMU_OPTION_OP_SET,
|
||||
.val64 = 0,
|
||||
};
|
||||
size_t mmap_buffer_size;
|
||||
unsigned long size;
|
||||
int mmap_flags;
|
||||
|
|
@ -2066,7 +2069,7 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
|
|||
|
||||
if (variant->buffer_size < MOCK_PAGE_SIZE) {
|
||||
SKIP(return,
|
||||
"Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
|
||||
"Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%u",
|
||||
variant->buffer_size, MOCK_PAGE_SIZE);
|
||||
}
|
||||
|
||||
|
|
@ -2114,16 +2117,18 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
|
|||
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
|
||||
|
||||
test_ioctl_ioas_alloc(&self->ioas_id);
|
||||
/* Enable 1M mock IOMMU hugepages */
|
||||
if (variant->hugepages) {
|
||||
test_cmd_mock_domain_flags(self->ioas_id,
|
||||
MOCK_FLAGS_DEVICE_HUGE_IOVA,
|
||||
&self->stdev_id, &self->hwpt_id,
|
||||
&self->idev_id);
|
||||
} else {
|
||||
test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
|
||||
&self->hwpt_id, &self->idev_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* For dirty testing it is important that the page size fed into
|
||||
* the iommu page tables matches the size the dirty logic
|
||||
* expects, or set_dirty can touch too much stuff.
|
||||
*/
|
||||
cmd.object_id = self->ioas_id;
|
||||
if (!variant->hugepages)
|
||||
ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
|
||||
|
||||
test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
|
||||
&self->idev_id);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
|
||||
|
|
@ -2248,18 +2253,23 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
|
|||
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
|
||||
{
|
||||
uint32_t page_size = MOCK_PAGE_SIZE;
|
||||
uint32_t ioas_id = self->ioas_id;
|
||||
uint32_t hwpt_id;
|
||||
uint32_t ioas_id;
|
||||
|
||||
if (variant->hugepages)
|
||||
page_size = MOCK_HUGE_PAGE_SIZE;
|
||||
|
||||
test_ioctl_ioas_alloc(&ioas_id);
|
||||
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
|
||||
variant->buffer_size, MOCK_APERTURE_START);
|
||||
|
||||
test_cmd_hwpt_alloc(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
|
||||
if (variant->hugepages)
|
||||
test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
|
||||
MOCK_IOMMUPT_HUGE, &hwpt_id);
|
||||
else
|
||||
test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
|
||||
MOCK_IOMMUPT_DEFAULT, &hwpt_id);
|
||||
|
||||
test_cmd_set_dirty_tracking(hwpt_id, true);
|
||||
|
||||
|
|
@ -2285,18 +2295,24 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
|
|||
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
|
||||
{
|
||||
uint32_t page_size = MOCK_PAGE_SIZE;
|
||||
uint32_t ioas_id = self->ioas_id;
|
||||
uint32_t hwpt_id;
|
||||
uint32_t ioas_id;
|
||||
|
||||
if (variant->hugepages)
|
||||
page_size = MOCK_HUGE_PAGE_SIZE;
|
||||
|
||||
test_ioctl_ioas_alloc(&ioas_id);
|
||||
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
|
||||
variant->buffer_size, MOCK_APERTURE_START);
|
||||
|
||||
test_cmd_hwpt_alloc(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
|
||||
|
||||
if (variant->hugepages)
|
||||
test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
|
||||
MOCK_IOMMUPT_HUGE, &hwpt_id);
|
||||
else
|
||||
test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
|
||||
IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
|
||||
MOCK_IOMMUPT_DEFAULT, &hwpt_id);
|
||||
|
||||
test_cmd_set_dirty_tracking(hwpt_id, true);
|
||||
|
||||
|
|
|
|||
|
|
@ -215,6 +215,18 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i
|
|||
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
|
||||
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
|
||||
0))
|
||||
#define test_cmd_hwpt_alloc_iommupt(device_id, pt_id, flags, iommupt_type, \
|
||||
hwpt_id) \
|
||||
({ \
|
||||
struct iommu_hwpt_selftest user_cfg = { \
|
||||
.pagetable_type = iommupt_type \
|
||||
}; \
|
||||
\
|
||||
ASSERT_EQ(0, _test_cmd_hwpt_alloc( \
|
||||
self->fd, device_id, pt_id, 0, flags, \
|
||||
hwpt_id, IOMMU_HWPT_DATA_SELFTEST, \
|
||||
&user_cfg, sizeof(user_cfg))); \
|
||||
})
|
||||
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
|
||||
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
|
||||
self->fd, device_id, pt_id, 0, flags, \
|
||||
|
|
|
|||
Loading…
Reference in New Issue