mirror of https://github.com/torvalds/linux.git
vfio/nvgrace-gpu: register device memory for poison handling
The nvgrace-gpu-vfio-pci module [1] maps the device memory to the user VA (Qemu) using remap_pfn_range() without adding the memory to the kernel. The device memory pages are not backed by struct page. The previous patch implements the mechanism to handle ECC/poison on memory page without struct page. This new mechanism is being used here. The module registers its memory region and the address_space with the kernel MM for ECC handling using the register_pfn_address_space() registration API exposed by the kernel. Link: https://lore.kernel.org/all/20240220115055.23546-1-ankita@nvidia.com/ [1] Link: https://lkml.kernel.org/r/20251102184434.2406-4-ankita@nvidia.com Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Acked-by: Alex Williamson <alex@shazbot.org> Cc: Aniket Agashe <aniketa@nvidia.com> Cc: Borislav Betkov <bp@alien8.de> Cc: David Hildenbrand <david@redhat.com> Cc: Hanjun Guo <guohanjun@huawei.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com> Cc: Kevin Tian <kevin.tian@intel.com> Cc: Kirti Wankhede <kwankhede@nvidia.com> Cc: Len Brown <lenb@kernel.org> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Matthew R. Ochs <mochs@nvidia.com> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Naoya Horiguchi <nao.horiguchi@gmail.com> Cc: Neo Jia <cjia@nvidia.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Shuai Xue <xueshuai@linux.alibaba.com> Cc: Smita Koralahalli Channabasappa <smita.koralahallichannabasappa@amd.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tarun Gupta <targupta@nvidia.com> Cc: Uwe Kleine-König <u.kleine-koenig@baylibre.com> Cc: Vikram Sethi <vsethi@nvidia.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Zhi Wang <zhiw@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
2ec4196718
commit
ebb9aeb980
|
|
@ -8,6 +8,10 @@
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/jiffies.h>
|
#include <linux/jiffies.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
#include <linux/memory-failure.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The device memory usable to the workloads running in the VM is cached
|
* The device memory usable to the workloads running in the VM is cached
|
||||||
* and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
|
* and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
|
||||||
|
|
@ -47,6 +51,9 @@ struct mem_region {
|
||||||
void *memaddr;
|
void *memaddr;
|
||||||
void __iomem *ioaddr;
|
void __iomem *ioaddr;
|
||||||
}; /* Base virtual address of the region */
|
}; /* Base virtual address of the region */
|
||||||
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
struct pfn_address_space pfn_address_space;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvgrace_gpu_pci_core_device {
|
struct nvgrace_gpu_pci_core_device {
|
||||||
|
|
@ -60,6 +67,28 @@ struct nvgrace_gpu_pci_core_device {
|
||||||
bool has_mig_hw_bug;
|
bool has_mig_hw_bug;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
|
||||||
|
static int
|
||||||
|
nvgrace_gpu_vfio_pci_register_pfn_range(struct mem_region *region,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
unsigned long nr_pages;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
nr_pages = region->memlength >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
region->pfn_address_space.node.start = vma->vm_pgoff;
|
||||||
|
region->pfn_address_space.node.last = vma->vm_pgoff + nr_pages - 1;
|
||||||
|
region->pfn_address_space.mapping = vma->vm_file->f_mapping;
|
||||||
|
|
||||||
|
ret = register_pfn_address_space(®ion->pfn_address_space);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
|
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
|
||||||
{
|
{
|
||||||
struct nvgrace_gpu_pci_core_device *nvdev =
|
struct nvgrace_gpu_pci_core_device *nvdev =
|
||||||
|
|
@ -127,6 +156,13 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
|
||||||
|
|
||||||
mutex_destroy(&nvdev->remap_lock);
|
mutex_destroy(&nvdev->remap_lock);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
if (nvdev->resmem.memlength)
|
||||||
|
unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
|
||||||
|
|
||||||
|
unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
|
||||||
|
#endif
|
||||||
|
|
||||||
vfio_pci_core_close_device(core_vdev);
|
vfio_pci_core_close_device(core_vdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,7 +238,14 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
|
||||||
|
|
||||||
vma->vm_pgoff = start_pfn;
|
vma->vm_pgoff = start_pfn;
|
||||||
|
|
||||||
return 0;
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
if (nvdev->resmem.memlength && index == VFIO_PCI_BAR2_REGION_INDEX)
|
||||||
|
ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->resmem, vma);
|
||||||
|
else if (index == VFIO_PCI_BAR4_REGION_INDEX)
|
||||||
|
ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->usemem, vma);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static long
|
static long
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue