linux/drivers/vfio/pci/vfio_pci_dmabuf.c

317 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
*/
#include <linux/dma-buf-mapping.h>
#include <linux/pci-p2pdma.h>
#include <linux/dma-resv.h>
#include "vfio_pci_priv.h"
MODULE_IMPORT_NS("DMA_BUF");
struct vfio_pci_dma_buf {
struct dma_buf *dmabuf;
struct vfio_pci_core_device *vdev;
struct list_head dmabufs_elm;
size_t size;
struct dma_buf_phys_vec *phys_vec;
struct p2pdma_provider *provider;
u32 nr_ranges;
u8 revoked : 1;
};
static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment)
{
struct vfio_pci_dma_buf *priv = dmabuf->priv;
if (!attachment->peer2peer)
return -EOPNOTSUPP;
if (priv->revoked)
return -ENODEV;
return 0;
}
static struct sg_table *
vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
{
struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
dma_resv_assert_held(priv->dmabuf->resv);
if (priv->revoked)
return ERR_PTR(-ENODEV);
return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
priv->phys_vec, priv->nr_ranges,
priv->size, dir);
}
static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
struct sg_table *sgt,
enum dma_data_direction dir)
{
dma_buf_free_sgt(attachment, sgt, dir);
}
static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
{
struct vfio_pci_dma_buf *priv = dmabuf->priv;
/*
* Either this or vfio_pci_dma_buf_cleanup() will remove from the list.
* The refcount prevents both.
*/
if (priv->vdev) {
down_write(&priv->vdev->memory_lock);
list_del_init(&priv->dmabufs_elm);
up_write(&priv->vdev->memory_lock);
vfio_device_put_registration(&priv->vdev->vdev);
}
kfree(priv->phys_vec);
kfree(priv);
}
static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
.attach = vfio_pci_dma_buf_attach,
.map_dma_buf = vfio_pci_dma_buf_map,
.unmap_dma_buf = vfio_pci_dma_buf_unmap,
.release = vfio_pci_dma_buf_release,
};
int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len)
{
phys_addr_t max_addr;
unsigned int i;
max_addr = start + len;
for (i = 0; i < nr_ranges; i++) {
phys_addr_t end;
if (!dma_ranges[i].length)
return -EINVAL;
if (check_add_overflow(start, dma_ranges[i].offset,
&phys_vec[i].paddr) ||
check_add_overflow(phys_vec[i].paddr,
dma_ranges[i].length, &end))
return -EOVERFLOW;
if (end > max_addr)
return -EINVAL;
phys_vec[i].len = dma_ranges[i].length;
}
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_fill_phys_vec);
int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
struct dma_buf_phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges)
{
struct pci_dev *pdev = vdev->pdev;
*provider = pcim_p2pdma_provider(pdev, region_index);
if (!*provider)
return -EINVAL;
return vfio_pci_core_fill_phys_vec(
phys_vec, dma_ranges, nr_ranges,
pci_resource_start(pdev, region_index),
pci_resource_len(pdev, region_index));
}
EXPORT_SYMBOL_GPL(vfio_pci_core_get_dmabuf_phys);
static int validate_dmabuf_input(struct vfio_device_feature_dma_buf *dma_buf,
struct vfio_region_dma_range *dma_ranges,
size_t *lengthp)
{
size_t length = 0;
u32 i;
for (i = 0; i < dma_buf->nr_ranges; i++) {
u64 offset = dma_ranges[i].offset;
u64 len = dma_ranges[i].length;
if (!len || !PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
return -EINVAL;
if (check_add_overflow(length, len, &length))
return -EINVAL;
}
/*
* dma_iova_try_alloc() will WARN on if userspace proposes a size that
* is too big, eg with lots of ranges.
*/
if ((u64)(length) & DMA_IOVA_USE_SWIOTLB)
return -EINVAL;
*lengthp = length;
return 0;
}
int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
struct vfio_device_feature_dma_buf __user *arg,
size_t argsz)
{
struct vfio_device_feature_dma_buf get_dma_buf = {};
struct vfio_region_dma_range *dma_ranges;
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct vfio_pci_dma_buf *priv;
size_t length;
int ret;
if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
return -EOPNOTSUPP;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
sizeof(get_dma_buf));
if (ret != 1)
return ret;
if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
return -EFAULT;
if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
return -EINVAL;
/*
* For PCI the region_index is the BAR number like everything else.
*/
if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
sizeof(*dma_ranges));
if (IS_ERR(dma_ranges))
return PTR_ERR(dma_ranges);
ret = validate_dmabuf_input(&get_dma_buf, dma_ranges, &length);
if (ret)
goto err_free_ranges;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
ret = -ENOMEM;
goto err_free_ranges;
}
priv->phys_vec = kcalloc(get_dma_buf.nr_ranges, sizeof(*priv->phys_vec),
GFP_KERNEL);
if (!priv->phys_vec) {
ret = -ENOMEM;
goto err_free_priv;
}
priv->vdev = vdev;
priv->nr_ranges = get_dma_buf.nr_ranges;
priv->size = length;
ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
get_dma_buf.region_index,
priv->phys_vec, dma_ranges,
priv->nr_ranges);
if (ret)
goto err_free_phys;
kfree(dma_ranges);
dma_ranges = NULL;
if (!vfio_device_try_get_registration(&vdev->vdev)) {
ret = -ENODEV;
goto err_free_phys;
}
exp_info.ops = &vfio_pci_dmabuf_ops;
exp_info.size = priv->size;
exp_info.flags = get_dma_buf.open_flags;
exp_info.priv = priv;
priv->dmabuf = dma_buf_export(&exp_info);
if (IS_ERR(priv->dmabuf)) {
ret = PTR_ERR(priv->dmabuf);
goto err_dev_put;
}
/* dma_buf_put() now frees priv */
INIT_LIST_HEAD(&priv->dmabufs_elm);
down_write(&vdev->memory_lock);
dma_resv_lock(priv->dmabuf->resv, NULL);
priv->revoked = !__vfio_pci_memory_enabled(vdev);
list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
dma_resv_unlock(priv->dmabuf->resv);
up_write(&vdev->memory_lock);
/*
* dma_buf_fd() consumes the reference, when the file closes the dmabuf
* will be released.
*/
ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags);
if (ret < 0)
goto err_dma_buf;
return ret;
err_dma_buf:
dma_buf_put(priv->dmabuf);
err_dev_put:
vfio_device_put_registration(&vdev->vdev);
err_free_phys:
kfree(priv->phys_vec);
err_free_priv:
kfree(priv);
err_free_ranges:
kfree(dma_ranges);
return ret;
}
void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
{
struct vfio_pci_dma_buf *priv;
struct vfio_pci_dma_buf *tmp;
lockdep_assert_held_write(&vdev->memory_lock);
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
if (priv->revoked != revoked) {
dma_resv_lock(priv->dmabuf->resv, NULL);
priv->revoked = revoked;
dma_buf_move_notify(priv->dmabuf);
dma_resv_unlock(priv->dmabuf->resv);
}
fput(priv->dmabuf->file);
}
}
void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
{
struct vfio_pci_dma_buf *priv;
struct vfio_pci_dma_buf *tmp;
down_write(&vdev->memory_lock);
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
dma_resv_lock(priv->dmabuf->resv, NULL);
list_del_init(&priv->dmabufs_elm);
priv->vdev = NULL;
priv->revoked = true;
dma_buf_move_notify(priv->dmabuf);
dma_resv_unlock(priv->dmabuf->resv);
vfio_device_put_registration(&vdev->vdev);
fput(priv->dmabuf->file);
}
up_write(&vdev->memory_lock);
}