|  | // SPDX-License-Identifier: GPL-2.0 | 
|  | /* | 
|  | * Copyright (C) 2018-2020 Intel Corporation. | 
|  | * Copyright (C) 2020 Red Hat, Inc. | 
|  | * | 
|  | * Author: Tiwei Bie <tiwei.bie@intel.com> | 
|  | *         Jason Wang <jasowang@redhat.com> | 
|  | * | 
|  | * Thanks Michael S. Tsirkin for the valuable comments and | 
|  | * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all | 
|  | * their supports. | 
|  | */ | 
|  |  | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/module.h> | 
|  | #include <linux/cdev.h> | 
|  | #include <linux/device.h> | 
|  | #include <linux/mm.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/iommu.h> | 
|  | #include <linux/uuid.h> | 
|  | #include <linux/vdpa.h> | 
|  | #include <linux/nospec.h> | 
|  | #include <linux/vhost.h> | 
|  |  | 
|  | #include "vhost.h" | 
|  |  | 
|  | enum { | 
|  | VHOST_VDPA_BACKEND_FEATURES = | 
|  | (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | | 
|  | (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | | 
|  | (1ULL << VHOST_BACKEND_F_IOTLB_ASID), | 
|  | }; | 
|  |  | 
|  | #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) | 
|  |  | 
|  | #define VHOST_VDPA_IOTLB_BUCKETS 16 | 
|  |  | 
|  | struct vhost_vdpa_as { | 
|  | struct hlist_node hash_link; | 
|  | struct vhost_iotlb iotlb; | 
|  | u32 id; | 
|  | }; | 
|  |  | 
|  | struct vhost_vdpa { | 
|  | struct vhost_dev vdev; | 
|  | struct iommu_domain *domain; | 
|  | struct vhost_virtqueue *vqs; | 
|  | struct completion completion; | 
|  | struct vdpa_device *vdpa; | 
|  | struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; | 
|  | struct device dev; | 
|  | struct cdev cdev; | 
|  | atomic_t opened; | 
|  | u32 nvqs; | 
|  | int virtio_id; | 
|  | int minor; | 
|  | struct eventfd_ctx *config_ctx; | 
|  | int in_batch; | 
|  | struct vdpa_iova_range range; | 
|  | u32 batch_asid; | 
|  | }; | 
|  |  | 
|  | static DEFINE_IDA(vhost_vdpa_ida); | 
|  |  | 
|  | static dev_t vhost_vdpa_major; | 
|  |  | 
|  | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, u64 start, | 
|  | u64 last, u32 asid); | 
|  |  | 
|  | static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) | 
|  | { | 
|  | struct vhost_vdpa_as *as = container_of(iotlb, struct | 
|  | vhost_vdpa_as, iotlb); | 
|  | return as->id; | 
|  | } | 
|  |  | 
|  | static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) | 
|  | { | 
|  | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; | 
|  | struct vhost_vdpa_as *as; | 
|  |  | 
|  | hlist_for_each_entry(as, head, hash_link) | 
|  | if (as->id == asid) | 
|  | return as; | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) | 
|  | { | 
|  | struct vhost_vdpa_as *as = asid_to_as(v, asid); | 
|  |  | 
|  | if (!as) | 
|  | return NULL; | 
|  |  | 
|  | return &as->iotlb; | 
|  | } | 
|  |  | 
|  | static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) | 
|  | { | 
|  | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; | 
|  | struct vhost_vdpa_as *as; | 
|  |  | 
|  | if (asid_to_as(v, asid)) | 
|  | return NULL; | 
|  |  | 
|  | if (asid >= v->vdpa->nas) | 
|  | return NULL; | 
|  |  | 
|  | as = kmalloc(sizeof(*as), GFP_KERNEL); | 
|  | if (!as) | 
|  | return NULL; | 
|  |  | 
|  | vhost_iotlb_init(&as->iotlb, 0, 0); | 
|  | as->id = asid; | 
|  | hlist_add_head(&as->hash_link, head); | 
|  |  | 
|  | return as; | 
|  | } | 
|  |  | 
|  | static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, | 
|  | u32 asid) | 
|  | { | 
|  | struct vhost_vdpa_as *as = asid_to_as(v, asid); | 
|  |  | 
|  | if (as) | 
|  | return as; | 
|  |  | 
|  | return vhost_vdpa_alloc_as(v, asid); | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (ops->reset_map) | 
|  | ops->reset_map(vdpa, asid); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) | 
|  | { | 
|  | struct vhost_vdpa_as *as = asid_to_as(v, asid); | 
|  |  | 
|  | if (!as) | 
|  | return -EINVAL; | 
|  |  | 
|  | hlist_del(&as->hash_link); | 
|  | vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); | 
|  | /* | 
|  | * Devices with vendor specific IOMMU may need to restore | 
|  | * iotlb to the initial or default state, which cannot be | 
|  | * cleaned up in the all range unmap call above. Give them | 
|  | * a chance to clean up or reset the map to the desired | 
|  | * state. | 
|  | */ | 
|  | vhost_vdpa_reset_map(v, asid); | 
|  | kfree(as); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void handle_vq_kick(struct vhost_work *work) | 
|  | { | 
|  | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | 
|  | poll.work); | 
|  | struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); | 
|  | const struct vdpa_config_ops *ops = v->vdpa->config; | 
|  |  | 
|  | ops->kick_vq(v->vdpa, vq - v->vqs); | 
|  | } | 
|  |  | 
|  | static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) | 
|  | { | 
|  | struct vhost_virtqueue *vq = private; | 
|  | struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; | 
|  |  | 
|  | if (call_ctx) | 
|  | eventfd_signal(call_ctx, 1); | 
|  |  | 
|  | return IRQ_HANDLED; | 
|  | } | 
|  |  | 
|  | static irqreturn_t vhost_vdpa_config_cb(void *private) | 
|  | { | 
|  | struct vhost_vdpa *v = private; | 
|  | struct eventfd_ctx *config_ctx = v->config_ctx; | 
|  |  | 
|  | if (config_ctx) | 
|  | eventfd_signal(config_ctx, 1); | 
|  |  | 
|  | return IRQ_HANDLED; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) | 
|  | { | 
|  | struct vhost_virtqueue *vq = &v->vqs[qid]; | 
|  | const struct vdpa_config_ops *ops = v->vdpa->config; | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | int ret, irq; | 
|  |  | 
|  | if (!ops->get_vq_irq) | 
|  | return; | 
|  |  | 
|  | irq = ops->get_vq_irq(vdpa, qid); | 
|  | if (irq < 0) | 
|  | return; | 
|  |  | 
|  | irq_bypass_unregister_producer(&vq->call_ctx.producer); | 
|  | if (!vq->call_ctx.ctx) | 
|  | return; | 
|  |  | 
|  | vq->call_ctx.producer.token = vq->call_ctx.ctx; | 
|  | vq->call_ctx.producer.irq = irq; | 
|  | ret = irq_bypass_register_producer(&vq->call_ctx.producer); | 
|  | if (unlikely(ret)) | 
|  | dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n", | 
|  | qid, vq->call_ctx.producer.token, ret); | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) | 
|  | { | 
|  | struct vhost_virtqueue *vq = &v->vqs[qid]; | 
|  |  | 
|  | irq_bypass_unregister_producer(&vq->call_ctx.producer); | 
|  | } | 
|  |  | 
|  | static int _compat_vdpa_reset(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | u32 flags = 0; | 
|  |  | 
|  | if (v->vdev.vqs) { | 
|  | flags |= !vhost_backend_has_feature(v->vdev.vqs[0], | 
|  | VHOST_BACKEND_F_IOTLB_PERSIST) ? | 
|  | VDPA_RESET_F_CLEAN_MAP : 0; | 
|  | } | 
|  |  | 
|  | return vdpa_reset(vdpa, flags); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_reset(struct vhost_vdpa *v) | 
|  | { | 
|  | v->in_batch = 0; | 
|  | return _compat_vdpa_reset(v); | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (!vdpa->use_va || !ops->bind_mm) | 
|  | return 0; | 
|  |  | 
|  | return ops->bind_mm(vdpa, v->vdev.mm); | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (!vdpa->use_va || !ops->unbind_mm) | 
|  | return; | 
|  |  | 
|  | ops->unbind_mm(vdpa); | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u32 device_id; | 
|  |  | 
|  | device_id = ops->get_device_id(vdpa); | 
|  |  | 
|  | if (copy_to_user(argp, &device_id, sizeof(device_id))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u8 status; | 
|  |  | 
|  | status = ops->get_status(vdpa); | 
|  |  | 
|  | if (copy_to_user(statusp, &status, sizeof(status))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u8 status, status_old; | 
|  | u32 nvqs = v->nvqs; | 
|  | int ret; | 
|  | u16 i; | 
|  |  | 
|  | if (copy_from_user(&status, statusp, sizeof(status))) | 
|  | return -EFAULT; | 
|  |  | 
|  | status_old = ops->get_status(vdpa); | 
|  |  | 
|  | /* | 
|  | * Userspace shouldn't remove status bits unless reset the | 
|  | * status to 0. | 
|  | */ | 
|  | if (status != 0 && (status_old & ~status) != 0) | 
|  | return -EINVAL; | 
|  |  | 
|  | if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) | 
|  | for (i = 0; i < nvqs; i++) | 
|  | vhost_vdpa_unsetup_vq_irq(v, i); | 
|  |  | 
|  | if (status == 0) { | 
|  | ret = _compat_vdpa_reset(v); | 
|  | if (ret) | 
|  | return ret; | 
|  | } else | 
|  | vdpa_set_status(vdpa, status); | 
|  |  | 
|  | if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) | 
|  | for (i = 0; i < nvqs; i++) | 
|  | vhost_vdpa_setup_vq_irq(v, i); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_config_validate(struct vhost_vdpa *v, | 
|  | struct vhost_vdpa_config *c) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | size_t size = vdpa->config->get_config_size(vdpa); | 
|  |  | 
|  | if (c->len == 0 || c->off > size) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (c->len > size - c->off) | 
|  | return -E2BIG; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_config(struct vhost_vdpa *v, | 
|  | struct vhost_vdpa_config __user *c) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | struct vhost_vdpa_config config; | 
|  | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | 
|  | u8 *buf; | 
|  |  | 
|  | if (copy_from_user(&config, c, size)) | 
|  | return -EFAULT; | 
|  | if (vhost_vdpa_config_validate(v, &config)) | 
|  | return -EINVAL; | 
|  | buf = kvzalloc(config.len, GFP_KERNEL); | 
|  | if (!buf) | 
|  | return -ENOMEM; | 
|  |  | 
|  | vdpa_get_config(vdpa, config.off, buf, config.len); | 
|  |  | 
|  | if (copy_to_user(c->buf, buf, config.len)) { | 
|  | kvfree(buf); | 
|  | return -EFAULT; | 
|  | } | 
|  |  | 
|  | kvfree(buf); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_set_config(struct vhost_vdpa *v, | 
|  | struct vhost_vdpa_config __user *c) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | struct vhost_vdpa_config config; | 
|  | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | 
|  | u8 *buf; | 
|  |  | 
|  | if (copy_from_user(&config, c, size)) | 
|  | return -EFAULT; | 
|  | if (vhost_vdpa_config_validate(v, &config)) | 
|  | return -EINVAL; | 
|  |  | 
|  | buf = vmemdup_user(c->buf, config.len); | 
|  | if (IS_ERR(buf)) | 
|  | return PTR_ERR(buf); | 
|  |  | 
|  | vdpa_set_config(vdpa, config.off, buf, config.len); | 
|  |  | 
|  | kvfree(buf); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | return ops->suspend; | 
|  | } | 
|  |  | 
|  | static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | return ops->resume; | 
|  | } | 
|  |  | 
|  | static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | return ops->get_vq_desc_group; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u64 features; | 
|  |  | 
|  | features = ops->get_device_features(vdpa); | 
|  |  | 
|  | if (copy_to_user(featurep, &features, sizeof(features))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (!ops->get_backend_features) | 
|  | return 0; | 
|  | else | 
|  | return ops->get_backend_features(vdpa); | 
|  | } | 
|  |  | 
|  | static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | return (!ops->set_map && !ops->dma_map) || ops->reset_map || | 
|  | vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vhost_dev *d = &v->vdev; | 
|  | u64 actual_features; | 
|  | u64 features; | 
|  | int i; | 
|  |  | 
|  | /* | 
|  | * It's not allowed to change the features after they have | 
|  | * been negotiated. | 
|  | */ | 
|  | if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) | 
|  | return -EBUSY; | 
|  |  | 
|  | if (copy_from_user(&features, featurep, sizeof(features))) | 
|  | return -EFAULT; | 
|  |  | 
|  | if (vdpa_set_features(vdpa, features)) | 
|  | return -EINVAL; | 
|  |  | 
|  | /* let the vqs know what has been configured */ | 
|  | actual_features = ops->get_driver_features(vdpa); | 
|  | for (i = 0; i < d->nvqs; ++i) { | 
|  | struct vhost_virtqueue *vq = d->vqs[i]; | 
|  |  | 
|  | mutex_lock(&vq->mutex); | 
|  | vq->acked_features = actual_features; | 
|  | mutex_unlock(&vq->mutex); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u16 num; | 
|  |  | 
|  | num = ops->get_vq_num_max(vdpa); | 
|  |  | 
|  | if (copy_to_user(argp, &num, sizeof(num))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_config_put(struct vhost_vdpa *v) | 
|  | { | 
|  | if (v->config_ctx) { | 
|  | eventfd_ctx_put(v->config_ctx); | 
|  | v->config_ctx = NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) | 
|  | { | 
|  | struct vdpa_callback cb; | 
|  | int fd; | 
|  | struct eventfd_ctx *ctx; | 
|  |  | 
|  | cb.callback = vhost_vdpa_config_cb; | 
|  | cb.private = v; | 
|  | if (copy_from_user(&fd, argp, sizeof(fd))) | 
|  | return  -EFAULT; | 
|  |  | 
|  | ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); | 
|  | swap(ctx, v->config_ctx); | 
|  |  | 
|  | if (!IS_ERR_OR_NULL(ctx)) | 
|  | eventfd_ctx_put(ctx); | 
|  |  | 
|  | if (IS_ERR(v->config_ctx)) { | 
|  | long ret = PTR_ERR(v->config_ctx); | 
|  |  | 
|  | v->config_ctx = NULL; | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | v->vdpa->config->set_config_cb(v->vdpa, &cb); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) | 
|  | { | 
|  | struct vhost_vdpa_iova_range range = { | 
|  | .first = v->range.first, | 
|  | .last = v->range.last, | 
|  | }; | 
|  |  | 
|  | if (copy_to_user(argp, &range, sizeof(range))) | 
|  | return -EFAULT; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u32 size; | 
|  |  | 
|  | size = ops->get_config_size(vdpa); | 
|  |  | 
|  | if (copy_to_user(argp, &size, sizeof(size))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  |  | 
|  | if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) | 
|  | return -EFAULT; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* After a successful return of ioctl the device must not process more | 
|  | * virtqueue descriptors. The device can answer to read or writes of config | 
|  | * fields as if it were not suspended. In particular, writing to "queue_enable" | 
|  | * with a value of 1 will not make the device start processing buffers. | 
|  | */ | 
|  | static long vhost_vdpa_suspend(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (!ops->suspend) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | return ops->suspend(vdpa); | 
|  | } | 
|  |  | 
|  | /* After a successful return of this ioctl the device resumes processing | 
|  | * virtqueue descriptors. The device becomes fully operational the same way it | 
|  | * was before it was suspended. | 
|  | */ | 
|  | static long vhost_vdpa_resume(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (!ops->resume) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | return ops->resume(vdpa); | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, | 
|  | void __user *argp) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vdpa_vq_state vq_state; | 
|  | struct vdpa_callback cb; | 
|  | struct vhost_virtqueue *vq; | 
|  | struct vhost_vring_state s; | 
|  | u32 idx; | 
|  | long r; | 
|  |  | 
|  | r = get_user(idx, (u32 __user *)argp); | 
|  | if (r < 0) | 
|  | return r; | 
|  |  | 
|  | if (idx >= v->nvqs) | 
|  | return -ENOBUFS; | 
|  |  | 
|  | idx = array_index_nospec(idx, v->nvqs); | 
|  | vq = &v->vqs[idx]; | 
|  |  | 
|  | switch (cmd) { | 
|  | case VHOST_VDPA_SET_VRING_ENABLE: | 
|  | if (copy_from_user(&s, argp, sizeof(s))) | 
|  | return -EFAULT; | 
|  | ops->set_vq_ready(vdpa, idx, s.num); | 
|  | return 0; | 
|  | case VHOST_VDPA_GET_VRING_GROUP: | 
|  | if (!ops->get_vq_group) | 
|  | return -EOPNOTSUPP; | 
|  | s.index = idx; | 
|  | s.num = ops->get_vq_group(vdpa, idx); | 
|  | if (s.num >= vdpa->ngroups) | 
|  | return -EIO; | 
|  | else if (copy_to_user(argp, &s, sizeof(s))) | 
|  | return -EFAULT; | 
|  | return 0; | 
|  | case VHOST_VDPA_GET_VRING_DESC_GROUP: | 
|  | if (!vhost_vdpa_has_desc_group(v)) | 
|  | return -EOPNOTSUPP; | 
|  | s.index = idx; | 
|  | s.num = ops->get_vq_desc_group(vdpa, idx); | 
|  | if (s.num >= vdpa->ngroups) | 
|  | return -EIO; | 
|  | else if (copy_to_user(argp, &s, sizeof(s))) | 
|  | return -EFAULT; | 
|  | return 0; | 
|  | case VHOST_VDPA_SET_GROUP_ASID: | 
|  | if (copy_from_user(&s, argp, sizeof(s))) | 
|  | return -EFAULT; | 
|  | if (s.num >= vdpa->nas) | 
|  | return -EINVAL; | 
|  | if (!ops->set_group_asid) | 
|  | return -EOPNOTSUPP; | 
|  | return ops->set_group_asid(vdpa, idx, s.num); | 
|  | case VHOST_GET_VRING_BASE: | 
|  | r = ops->get_vq_state(v->vdpa, idx, &vq_state); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { | 
|  | vq->last_avail_idx = vq_state.packed.last_avail_idx | | 
|  | (vq_state.packed.last_avail_counter << 15); | 
|  | vq->last_used_idx = vq_state.packed.last_used_idx | | 
|  | (vq_state.packed.last_used_counter << 15); | 
|  | } else { | 
|  | vq->last_avail_idx = vq_state.split.avail_index; | 
|  | } | 
|  | break; | 
|  | } | 
|  |  | 
|  | r = vhost_vring_ioctl(&v->vdev, cmd, argp); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | switch (cmd) { | 
|  | case VHOST_SET_VRING_ADDR: | 
|  | if (ops->set_vq_address(vdpa, idx, | 
|  | (u64)(uintptr_t)vq->desc, | 
|  | (u64)(uintptr_t)vq->avail, | 
|  | (u64)(uintptr_t)vq->used)) | 
|  | r = -EINVAL; | 
|  | break; | 
|  |  | 
|  | case VHOST_SET_VRING_BASE: | 
|  | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { | 
|  | vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; | 
|  | vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); | 
|  | vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; | 
|  | vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); | 
|  | } else { | 
|  | vq_state.split.avail_index = vq->last_avail_idx; | 
|  | } | 
|  | r = ops->set_vq_state(vdpa, idx, &vq_state); | 
|  | break; | 
|  |  | 
|  | case VHOST_SET_VRING_CALL: | 
|  | if (vq->call_ctx.ctx) { | 
|  | cb.callback = vhost_vdpa_virtqueue_cb; | 
|  | cb.private = vq; | 
|  | cb.trigger = vq->call_ctx.ctx; | 
|  | } else { | 
|  | cb.callback = NULL; | 
|  | cb.private = NULL; | 
|  | cb.trigger = NULL; | 
|  | } | 
|  | ops->set_vq_cb(vdpa, idx, &cb); | 
|  | vhost_vdpa_setup_vq_irq(v, idx); | 
|  | break; | 
|  |  | 
|  | case VHOST_SET_VRING_NUM: | 
|  | ops->set_vq_num(vdpa, idx, vq->num); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static long vhost_vdpa_unlocked_ioctl(struct file *filep, | 
|  | unsigned int cmd, unsigned long arg) | 
|  | { | 
|  | struct vhost_vdpa *v = filep->private_data; | 
|  | struct vhost_dev *d = &v->vdev; | 
|  | void __user *argp = (void __user *)arg; | 
|  | u64 __user *featurep = argp; | 
|  | u64 features; | 
|  | long r = 0; | 
|  |  | 
|  | if (cmd == VHOST_SET_BACKEND_FEATURES) { | 
|  | if (copy_from_user(&features, featurep, sizeof(features))) | 
|  | return -EFAULT; | 
|  | if (features & ~(VHOST_VDPA_BACKEND_FEATURES | | 
|  | BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | | 
|  | BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | | 
|  | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | | 
|  | BIT_ULL(VHOST_BACKEND_F_RESUME) | | 
|  | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) | 
|  | return -EOPNOTSUPP; | 
|  | if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && | 
|  | !vhost_vdpa_can_suspend(v)) | 
|  | return -EOPNOTSUPP; | 
|  | if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && | 
|  | !vhost_vdpa_can_resume(v)) | 
|  | return -EOPNOTSUPP; | 
|  | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && | 
|  | !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) | 
|  | return -EINVAL; | 
|  | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && | 
|  | !vhost_vdpa_has_desc_group(v)) | 
|  | return -EOPNOTSUPP; | 
|  | if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && | 
|  | !vhost_vdpa_has_persistent_map(v)) | 
|  | return -EOPNOTSUPP; | 
|  | vhost_set_backend_features(&v->vdev, features); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | mutex_lock(&d->mutex); | 
|  |  | 
|  | switch (cmd) { | 
|  | case VHOST_VDPA_GET_DEVICE_ID: | 
|  | r = vhost_vdpa_get_device_id(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_STATUS: | 
|  | r = vhost_vdpa_get_status(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_SET_STATUS: | 
|  | r = vhost_vdpa_set_status(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_CONFIG: | 
|  | r = vhost_vdpa_get_config(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_SET_CONFIG: | 
|  | r = vhost_vdpa_set_config(v, argp); | 
|  | break; | 
|  | case VHOST_GET_FEATURES: | 
|  | r = vhost_vdpa_get_features(v, argp); | 
|  | break; | 
|  | case VHOST_SET_FEATURES: | 
|  | r = vhost_vdpa_set_features(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_VRING_NUM: | 
|  | r = vhost_vdpa_get_vring_num(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_GROUP_NUM: | 
|  | if (copy_to_user(argp, &v->vdpa->ngroups, | 
|  | sizeof(v->vdpa->ngroups))) | 
|  | r = -EFAULT; | 
|  | break; | 
|  | case VHOST_VDPA_GET_AS_NUM: | 
|  | if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) | 
|  | r = -EFAULT; | 
|  | break; | 
|  | case VHOST_SET_LOG_BASE: | 
|  | case VHOST_SET_LOG_FD: | 
|  | r = -ENOIOCTLCMD; | 
|  | break; | 
|  | case VHOST_VDPA_SET_CONFIG_CALL: | 
|  | r = vhost_vdpa_set_config_call(v, argp); | 
|  | break; | 
|  | case VHOST_GET_BACKEND_FEATURES: | 
|  | features = VHOST_VDPA_BACKEND_FEATURES; | 
|  | if (vhost_vdpa_can_suspend(v)) | 
|  | features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); | 
|  | if (vhost_vdpa_can_resume(v)) | 
|  | features |= BIT_ULL(VHOST_BACKEND_F_RESUME); | 
|  | if (vhost_vdpa_has_desc_group(v)) | 
|  | features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); | 
|  | if (vhost_vdpa_has_persistent_map(v)) | 
|  | features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); | 
|  | features |= vhost_vdpa_get_backend_features(v); | 
|  | if (copy_to_user(featurep, &features, sizeof(features))) | 
|  | r = -EFAULT; | 
|  | break; | 
|  | case VHOST_VDPA_GET_IOVA_RANGE: | 
|  | r = vhost_vdpa_get_iova_range(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_CONFIG_SIZE: | 
|  | r = vhost_vdpa_get_config_size(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_GET_VQS_COUNT: | 
|  | r = vhost_vdpa_get_vqs_count(v, argp); | 
|  | break; | 
|  | case VHOST_VDPA_SUSPEND: | 
|  | r = vhost_vdpa_suspend(v); | 
|  | break; | 
|  | case VHOST_VDPA_RESUME: | 
|  | r = vhost_vdpa_resume(v); | 
|  | break; | 
|  | default: | 
|  | r = vhost_dev_ioctl(&v->vdev, cmd, argp); | 
|  | if (r == -ENOIOCTLCMD) | 
|  | r = vhost_vdpa_vring_ioctl(v, cmd, argp); | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (r) | 
|  | goto out; | 
|  |  | 
|  | switch (cmd) { | 
|  | case VHOST_SET_OWNER: | 
|  | r = vhost_vdpa_bind_mm(v); | 
|  | if (r) | 
|  | vhost_dev_reset_owner(d, NULL); | 
|  | break; | 
|  | } | 
|  | out: | 
|  | mutex_unlock(&d->mutex); | 
|  | return r; | 
|  | } | 
|  | static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb_map *map, u32 asid) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | if (ops->dma_map) { | 
|  | ops->dma_unmap(vdpa, asid, map->start, map->size); | 
|  | } else if (ops->set_map == NULL) { | 
|  | iommu_unmap(v->domain, map->start, map->size); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, | 
|  | u64 start, u64 last, u32 asid) | 
|  | { | 
|  | struct vhost_dev *dev = &v->vdev; | 
|  | struct vhost_iotlb_map *map; | 
|  | struct page *page; | 
|  | unsigned long pfn, pinned; | 
|  |  | 
|  | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | 
|  | pinned = PFN_DOWN(map->size); | 
|  | for (pfn = PFN_DOWN(map->addr); | 
|  | pinned > 0; pfn++, pinned--) { | 
|  | page = pfn_to_page(pfn); | 
|  | if (map->perm & VHOST_ACCESS_WO) | 
|  | set_page_dirty_lock(page); | 
|  | unpin_user_page(page); | 
|  | } | 
|  | atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); | 
|  | vhost_vdpa_general_unmap(v, map, asid); | 
|  | vhost_iotlb_map_free(iotlb, map); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, | 
|  | u64 start, u64 last, u32 asid) | 
|  | { | 
|  | struct vhost_iotlb_map *map; | 
|  | struct vdpa_map_file *map_file; | 
|  |  | 
|  | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | 
|  | map_file = (struct vdpa_map_file *)map->opaque; | 
|  | fput(map_file->file); | 
|  | kfree(map_file); | 
|  | vhost_vdpa_general_unmap(v, map, asid); | 
|  | vhost_iotlb_map_free(iotlb, map); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, u64 start, | 
|  | u64 last, u32 asid) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  |  | 
|  | if (vdpa->use_va) | 
|  | return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); | 
|  |  | 
|  | return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); | 
|  | } | 
|  |  | 
|  | static int perm_to_iommu_flags(u32 perm) | 
|  | { | 
|  | int flags = 0; | 
|  |  | 
|  | switch (perm) { | 
|  | case VHOST_ACCESS_WO: | 
|  | flags |= IOMMU_WRITE; | 
|  | break; | 
|  | case VHOST_ACCESS_RO: | 
|  | flags |= IOMMU_READ; | 
|  | break; | 
|  | case VHOST_ACCESS_RW: | 
|  | flags |= (IOMMU_WRITE | IOMMU_READ); | 
|  | break; | 
|  | default: | 
|  | WARN(1, "invalidate vhost IOTLB permission\n"); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return flags | IOMMU_CACHE; | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, | 
|  | u64 iova, u64 size, u64 pa, u32 perm, void *opaque) | 
|  | { | 
|  | struct vhost_dev *dev = &v->vdev; | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u32 asid = iotlb_to_asid(iotlb); | 
|  | int r = 0; | 
|  |  | 
|  | r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, | 
|  | pa, perm, opaque); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | if (ops->dma_map) { | 
|  | r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); | 
|  | } else if (ops->set_map) { | 
|  | if (!v->in_batch) | 
|  | r = ops->set_map(vdpa, asid, iotlb); | 
|  | } else { | 
|  | r = iommu_map(v->domain, iova, pa, size, | 
|  | perm_to_iommu_flags(perm), GFP_KERNEL); | 
|  | } | 
|  | if (r) { | 
|  | vhost_iotlb_del_range(iotlb, iova, iova + size - 1); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | if (!vdpa->use_va) | 
|  | atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_unmap(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, | 
|  | u64 iova, u64 size) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | u32 asid = iotlb_to_asid(iotlb); | 
|  |  | 
|  | vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); | 
|  |  | 
|  | if (ops->set_map) { | 
|  | if (!v->in_batch) | 
|  | ops->set_map(vdpa, asid, iotlb); | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_va_map(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, | 
|  | u64 iova, u64 size, u64 uaddr, u32 perm) | 
|  | { | 
|  | struct vhost_dev *dev = &v->vdev; | 
|  | u64 offset, map_size, map_iova = iova; | 
|  | struct vdpa_map_file *map_file; | 
|  | struct vm_area_struct *vma; | 
|  | int ret = 0; | 
|  |  | 
|  | mmap_read_lock(dev->mm); | 
|  |  | 
|  | while (size) { | 
|  | vma = find_vma(dev->mm, uaddr); | 
|  | if (!vma) { | 
|  | ret = -EINVAL; | 
|  | break; | 
|  | } | 
|  | map_size = min(size, vma->vm_end - uaddr); | 
|  | if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && | 
|  | !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) | 
|  | goto next; | 
|  |  | 
|  | map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); | 
|  | if (!map_file) { | 
|  | ret = -ENOMEM; | 
|  | break; | 
|  | } | 
|  | offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; | 
|  | map_file->offset = offset; | 
|  | map_file->file = get_file(vma->vm_file); | 
|  | ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, | 
|  | perm, map_file); | 
|  | if (ret) { | 
|  | fput(map_file->file); | 
|  | kfree(map_file); | 
|  | break; | 
|  | } | 
|  | next: | 
|  | size -= map_size; | 
|  | uaddr += map_size; | 
|  | map_iova += map_size; | 
|  | } | 
|  | if (ret) | 
|  | vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); | 
|  |  | 
|  | mmap_read_unlock(dev->mm); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_pa_map(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, | 
|  | u64 iova, u64 size, u64 uaddr, u32 perm) | 
|  | { | 
|  | struct vhost_dev *dev = &v->vdev; | 
|  | struct page **page_list; | 
|  | unsigned long list_size = PAGE_SIZE / sizeof(struct page *); | 
|  | unsigned int gup_flags = FOLL_LONGTERM; | 
|  | unsigned long npages, cur_base, map_pfn, last_pfn = 0; | 
|  | unsigned long lock_limit, sz2pin, nchunks, i; | 
|  | u64 start = iova; | 
|  | long pinned; | 
|  | int ret = 0; | 
|  |  | 
|  | /* Limit the use of memory for bookkeeping */ | 
|  | page_list = (struct page **) __get_free_page(GFP_KERNEL); | 
|  | if (!page_list) | 
|  | return -ENOMEM; | 
|  |  | 
|  | if (perm & VHOST_ACCESS_WO) | 
|  | gup_flags |= FOLL_WRITE; | 
|  |  | 
|  | npages = PFN_UP(size + (iova & ~PAGE_MASK)); | 
|  | if (!npages) { | 
|  | ret = -EINVAL; | 
|  | goto free; | 
|  | } | 
|  |  | 
|  | mmap_read_lock(dev->mm); | 
|  |  | 
|  | lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); | 
|  | if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { | 
|  | ret = -ENOMEM; | 
|  | goto unlock; | 
|  | } | 
|  |  | 
|  | cur_base = uaddr & PAGE_MASK; | 
|  | iova &= PAGE_MASK; | 
|  | nchunks = 0; | 
|  |  | 
|  | while (npages) { | 
|  | sz2pin = min_t(unsigned long, npages, list_size); | 
|  | pinned = pin_user_pages(cur_base, sz2pin, | 
|  | gup_flags, page_list); | 
|  | if (sz2pin != pinned) { | 
|  | if (pinned < 0) { | 
|  | ret = pinned; | 
|  | } else { | 
|  | unpin_user_pages(page_list, pinned); | 
|  | ret = -ENOMEM; | 
|  | } | 
|  | goto out; | 
|  | } | 
|  | nchunks++; | 
|  |  | 
|  | if (!last_pfn) | 
|  | map_pfn = page_to_pfn(page_list[0]); | 
|  |  | 
|  | for (i = 0; i < pinned; i++) { | 
|  | unsigned long this_pfn = page_to_pfn(page_list[i]); | 
|  | u64 csize; | 
|  |  | 
|  | if (last_pfn && (this_pfn != last_pfn + 1)) { | 
|  | /* Pin a contiguous chunk of memory */ | 
|  | csize = PFN_PHYS(last_pfn - map_pfn + 1); | 
|  | ret = vhost_vdpa_map(v, iotlb, iova, csize, | 
|  | PFN_PHYS(map_pfn), | 
|  | perm, NULL); | 
|  | if (ret) { | 
|  | /* | 
|  | * Unpin the pages that are left unmapped | 
|  | * from this point on in the current | 
|  | * page_list. The remaining outstanding | 
|  | * ones which may stride across several | 
|  | * chunks will be covered in the common | 
|  | * error path subsequently. | 
|  | */ | 
|  | unpin_user_pages(&page_list[i], | 
|  | pinned - i); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | map_pfn = this_pfn; | 
|  | iova += csize; | 
|  | nchunks = 0; | 
|  | } | 
|  |  | 
|  | last_pfn = this_pfn; | 
|  | } | 
|  |  | 
|  | cur_base += PFN_PHYS(pinned); | 
|  | npages -= pinned; | 
|  | } | 
|  |  | 
|  | /* Pin the rest chunk */ | 
|  | ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), | 
|  | PFN_PHYS(map_pfn), perm, NULL); | 
|  | out: | 
|  | if (ret) { | 
|  | if (nchunks) { | 
|  | unsigned long pfn; | 
|  |  | 
|  | /* | 
|  | * Unpin the outstanding pages which are yet to be | 
|  | * mapped but haven't due to vdpa_map() or | 
|  | * pin_user_pages() failure. | 
|  | * | 
|  | * Mapped pages are accounted in vdpa_map(), hence | 
|  | * the corresponding unpinning will be handled by | 
|  | * vdpa_unmap(). | 
|  | */ | 
|  | WARN_ON(!last_pfn); | 
|  | for (pfn = map_pfn; pfn <= last_pfn; pfn++) | 
|  | unpin_user_page(pfn_to_page(pfn)); | 
|  | } | 
|  | vhost_vdpa_unmap(v, iotlb, start, size); | 
|  | } | 
|  | unlock: | 
|  | mmap_read_unlock(dev->mm); | 
|  | free: | 
|  | free_page((unsigned long)page_list); | 
|  | return ret; | 
|  |  | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, | 
|  | struct vhost_iotlb *iotlb, | 
|  | struct vhost_iotlb_msg *msg) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  |  | 
|  | if (msg->iova < v->range.first || !msg->size || | 
|  | msg->iova > U64_MAX - msg->size + 1 || | 
|  | msg->iova + msg->size - 1 > v->range.last) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (vhost_iotlb_itree_first(iotlb, msg->iova, | 
|  | msg->iova + msg->size - 1)) | 
|  | return -EEXIST; | 
|  |  | 
|  | if (vdpa->use_va) | 
|  | return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, | 
|  | msg->uaddr, msg->perm); | 
|  |  | 
|  | return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, | 
|  | msg->perm); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, | 
|  | struct vhost_iotlb_msg *msg) | 
|  | { | 
|  | struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vhost_iotlb *iotlb = NULL; | 
|  | struct vhost_vdpa_as *as = NULL; | 
|  | int r = 0; | 
|  |  | 
|  | mutex_lock(&dev->mutex); | 
|  |  | 
|  | r = vhost_dev_check_owner(dev); | 
|  | if (r) | 
|  | goto unlock; | 
|  |  | 
|  | if (msg->type == VHOST_IOTLB_UPDATE || | 
|  | msg->type == VHOST_IOTLB_BATCH_BEGIN) { | 
|  | as = vhost_vdpa_find_alloc_as(v, asid); | 
|  | if (!as) { | 
|  | dev_err(&v->dev, "can't find and alloc asid %d\n", | 
|  | asid); | 
|  | r = -EINVAL; | 
|  | goto unlock; | 
|  | } | 
|  | iotlb = &as->iotlb; | 
|  | } else | 
|  | iotlb = asid_to_iotlb(v, asid); | 
|  |  | 
|  | if ((v->in_batch && v->batch_asid != asid) || !iotlb) { | 
|  | if (v->in_batch && v->batch_asid != asid) { | 
|  | dev_info(&v->dev, "batch id %d asid %d\n", | 
|  | v->batch_asid, asid); | 
|  | } | 
|  | if (!iotlb) | 
|  | dev_err(&v->dev, "no iotlb for asid %d\n", asid); | 
|  | r = -EINVAL; | 
|  | goto unlock; | 
|  | } | 
|  |  | 
|  | switch (msg->type) { | 
|  | case VHOST_IOTLB_UPDATE: | 
|  | r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); | 
|  | break; | 
|  | case VHOST_IOTLB_INVALIDATE: | 
|  | vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); | 
|  | break; | 
|  | case VHOST_IOTLB_BATCH_BEGIN: | 
|  | v->batch_asid = asid; | 
|  | v->in_batch = true; | 
|  | break; | 
|  | case VHOST_IOTLB_BATCH_END: | 
|  | if (v->in_batch && ops->set_map) | 
|  | ops->set_map(vdpa, asid, iotlb); | 
|  | v->in_batch = false; | 
|  | break; | 
|  | default: | 
|  | r = -EINVAL; | 
|  | break; | 
|  | } | 
|  | unlock: | 
|  | mutex_unlock(&dev->mutex); | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, | 
|  | struct iov_iter *from) | 
|  | { | 
|  | struct file *file = iocb->ki_filp; | 
|  | struct vhost_vdpa *v = file->private_data; | 
|  | struct vhost_dev *dev = &v->vdev; | 
|  |  | 
|  | return vhost_chr_write_iter(dev, from); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | 
|  | const struct bus_type *bus; | 
|  | int ret; | 
|  |  | 
|  | /* Device want to do DMA by itself */ | 
|  | if (ops->set_map || ops->dma_map) | 
|  | return 0; | 
|  |  | 
|  | bus = dma_dev->bus; | 
|  | if (!bus) | 
|  | return -EFAULT; | 
|  |  | 
|  | if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { | 
|  | dev_warn_once(&v->dev, | 
|  | "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); | 
|  | return -ENOTSUPP; | 
|  | } | 
|  |  | 
|  | v->domain = iommu_domain_alloc(bus); | 
|  | if (!v->domain) | 
|  | return -EIO; | 
|  |  | 
|  | ret = iommu_attach_device(v->domain, dma_dev); | 
|  | if (ret) | 
|  | goto err_attach; | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_attach: | 
|  | iommu_domain_free(v->domain); | 
|  | v->domain = NULL; | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_free_domain(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | 
|  |  | 
|  | if (v->domain) { | 
|  | iommu_detach_device(v->domain, dma_dev); | 
|  | iommu_domain_free(v->domain); | 
|  | } | 
|  |  | 
|  | v->domain = NULL; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vdpa_iova_range *range = &v->range; | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  |  | 
|  | if (ops->get_iova_range) { | 
|  | *range = ops->get_iova_range(vdpa); | 
|  | } else if (v->domain && v->domain->geometry.force_aperture) { | 
|  | range->first = v->domain->geometry.aperture_start; | 
|  | range->last = v->domain->geometry.aperture_end; | 
|  | } else { | 
|  | range->first = 0; | 
|  | range->last = ULLONG_MAX; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_cleanup(struct vhost_vdpa *v) | 
|  | { | 
|  | struct vhost_vdpa_as *as; | 
|  | u32 asid; | 
|  |  | 
|  | for (asid = 0; asid < v->vdpa->nas; asid++) { | 
|  | as = asid_to_as(v, asid); | 
|  | if (as) | 
|  | vhost_vdpa_remove_as(v, asid); | 
|  | } | 
|  |  | 
|  | vhost_vdpa_free_domain(v); | 
|  | vhost_dev_cleanup(&v->vdev); | 
|  | kfree(v->vdev.vqs); | 
|  | v->vdev.vqs = NULL; | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_open(struct inode *inode, struct file *filep) | 
|  | { | 
|  | struct vhost_vdpa *v; | 
|  | struct vhost_dev *dev; | 
|  | struct vhost_virtqueue **vqs; | 
|  | int r, opened; | 
|  | u32 i, nvqs; | 
|  |  | 
|  | v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); | 
|  |  | 
|  | opened = atomic_cmpxchg(&v->opened, 0, 1); | 
|  | if (opened) | 
|  | return -EBUSY; | 
|  |  | 
|  | nvqs = v->nvqs; | 
|  | r = vhost_vdpa_reset(v); | 
|  | if (r) | 
|  | goto err; | 
|  |  | 
|  | vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); | 
|  | if (!vqs) { | 
|  | r = -ENOMEM; | 
|  | goto err; | 
|  | } | 
|  |  | 
|  | dev = &v->vdev; | 
|  | for (i = 0; i < nvqs; i++) { | 
|  | vqs[i] = &v->vqs[i]; | 
|  | vqs[i]->handle_kick = handle_vq_kick; | 
|  | } | 
|  | vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, | 
|  | vhost_vdpa_process_iotlb_msg); | 
|  |  | 
|  | r = vhost_vdpa_alloc_domain(v); | 
|  | if (r) | 
|  | goto err_alloc_domain; | 
|  |  | 
|  | vhost_vdpa_set_iova_range(v); | 
|  |  | 
|  | filep->private_data = v; | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_alloc_domain: | 
|  | vhost_vdpa_cleanup(v); | 
|  | err: | 
|  | atomic_dec(&v->opened); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) | 
|  | { | 
|  | u32 i; | 
|  |  | 
|  | for (i = 0; i < v->nvqs; i++) | 
|  | vhost_vdpa_unsetup_vq_irq(v, i); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_release(struct inode *inode, struct file *filep) | 
|  | { | 
|  | struct vhost_vdpa *v = filep->private_data; | 
|  | struct vhost_dev *d = &v->vdev; | 
|  |  | 
|  | mutex_lock(&d->mutex); | 
|  | filep->private_data = NULL; | 
|  | vhost_vdpa_clean_irq(v); | 
|  | vhost_vdpa_reset(v); | 
|  | vhost_dev_stop(&v->vdev); | 
|  | vhost_vdpa_unbind_mm(v); | 
|  | vhost_vdpa_config_put(v); | 
|  | vhost_vdpa_cleanup(v); | 
|  | mutex_unlock(&d->mutex); | 
|  |  | 
|  | atomic_dec(&v->opened); | 
|  | complete(&v->completion); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | #ifdef CONFIG_MMU | 
|  | static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) | 
|  | { | 
|  | struct vhost_vdpa *v = vmf->vma->vm_file->private_data; | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vdpa_notification_area notify; | 
|  | struct vm_area_struct *vma = vmf->vma; | 
|  | u16 index = vma->vm_pgoff; | 
|  |  | 
|  | notify = ops->get_vq_notification(vdpa, index); | 
|  |  | 
|  | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | 
|  | if (remap_pfn_range(vma, vmf->address & PAGE_MASK, | 
|  | PFN_DOWN(notify.addr), PAGE_SIZE, | 
|  | vma->vm_page_prot)) | 
|  | return VM_FAULT_SIGBUS; | 
|  |  | 
|  | return VM_FAULT_NOPAGE; | 
|  | } | 
|  |  | 
|  | static const struct vm_operations_struct vhost_vdpa_vm_ops = { | 
|  | .fault = vhost_vdpa_fault, | 
|  | }; | 
|  |  | 
|  | static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) | 
|  | { | 
|  | struct vhost_vdpa *v = vma->vm_file->private_data; | 
|  | struct vdpa_device *vdpa = v->vdpa; | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vdpa_notification_area notify; | 
|  | unsigned long index = vma->vm_pgoff; | 
|  |  | 
|  | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | 
|  | return -EINVAL; | 
|  | if ((vma->vm_flags & VM_SHARED) == 0) | 
|  | return -EINVAL; | 
|  | if (vma->vm_flags & VM_READ) | 
|  | return -EINVAL; | 
|  | if (index > 65535) | 
|  | return -EINVAL; | 
|  | if (!ops->get_vq_notification) | 
|  | return -ENOTSUPP; | 
|  |  | 
|  | /* To be safe and easily modelled by userspace, We only | 
|  | * support the doorbell which sits on the page boundary and | 
|  | * does not share the page with other registers. | 
|  | */ | 
|  | notify = ops->get_vq_notification(vdpa, index); | 
|  | if (notify.addr & (PAGE_SIZE - 1)) | 
|  | return -EINVAL; | 
|  | if (vma->vm_end - vma->vm_start != notify.size) | 
|  | return -ENOTSUPP; | 
|  |  | 
|  | vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); | 
|  | vma->vm_ops = &vhost_vdpa_vm_ops; | 
|  | return 0; | 
|  | } | 
|  | #endif /* CONFIG_MMU */ | 
|  |  | 
|  | static const struct file_operations vhost_vdpa_fops = { | 
|  | .owner		= THIS_MODULE, | 
|  | .open		= vhost_vdpa_open, | 
|  | .release	= vhost_vdpa_release, | 
|  | .write_iter	= vhost_vdpa_chr_write_iter, | 
|  | .unlocked_ioctl	= vhost_vdpa_unlocked_ioctl, | 
|  | #ifdef CONFIG_MMU | 
|  | .mmap		= vhost_vdpa_mmap, | 
|  | #endif /* CONFIG_MMU */ | 
|  | .compat_ioctl	= compat_ptr_ioctl, | 
|  | }; | 
|  |  | 
|  | static void vhost_vdpa_release_dev(struct device *device) | 
|  | { | 
|  | struct vhost_vdpa *v = | 
|  | container_of(device, struct vhost_vdpa, dev); | 
|  |  | 
|  | ida_simple_remove(&vhost_vdpa_ida, v->minor); | 
|  | kfree(v->vqs); | 
|  | kfree(v); | 
|  | } | 
|  |  | 
|  | static int vhost_vdpa_probe(struct vdpa_device *vdpa) | 
|  | { | 
|  | const struct vdpa_config_ops *ops = vdpa->config; | 
|  | struct vhost_vdpa *v; | 
|  | int minor; | 
|  | int i, r; | 
|  |  | 
|  | /* We can't support platform IOMMU device with more than 1 | 
|  | * group or as | 
|  | */ | 
|  | if (!ops->set_map && !ops->dma_map && | 
|  | (vdpa->ngroups > 1 || vdpa->nas > 1)) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); | 
|  | if (!v) | 
|  | return -ENOMEM; | 
|  |  | 
|  | minor = ida_simple_get(&vhost_vdpa_ida, 0, | 
|  | VHOST_VDPA_DEV_MAX, GFP_KERNEL); | 
|  | if (minor < 0) { | 
|  | kfree(v); | 
|  | return minor; | 
|  | } | 
|  |  | 
|  | atomic_set(&v->opened, 0); | 
|  | v->minor = minor; | 
|  | v->vdpa = vdpa; | 
|  | v->nvqs = vdpa->nvqs; | 
|  | v->virtio_id = ops->get_device_id(vdpa); | 
|  |  | 
|  | device_initialize(&v->dev); | 
|  | v->dev.release = vhost_vdpa_release_dev; | 
|  | v->dev.parent = &vdpa->dev; | 
|  | v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); | 
|  | v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), | 
|  | GFP_KERNEL); | 
|  | if (!v->vqs) { | 
|  | r = -ENOMEM; | 
|  | goto err; | 
|  | } | 
|  |  | 
|  | r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); | 
|  | if (r) | 
|  | goto err; | 
|  |  | 
|  | cdev_init(&v->cdev, &vhost_vdpa_fops); | 
|  | v->cdev.owner = THIS_MODULE; | 
|  |  | 
|  | r = cdev_device_add(&v->cdev, &v->dev); | 
|  | if (r) | 
|  | goto err; | 
|  |  | 
|  | init_completion(&v->completion); | 
|  | vdpa_set_drvdata(vdpa, v); | 
|  |  | 
|  | for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) | 
|  | INIT_HLIST_HEAD(&v->as[i]); | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err: | 
|  | put_device(&v->dev); | 
|  | ida_simple_remove(&vhost_vdpa_ida, v->minor); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static void vhost_vdpa_remove(struct vdpa_device *vdpa) | 
|  | { | 
|  | struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); | 
|  | int opened; | 
|  |  | 
|  | cdev_device_del(&v->cdev, &v->dev); | 
|  |  | 
|  | do { | 
|  | opened = atomic_cmpxchg(&v->opened, 0, 1); | 
|  | if (!opened) | 
|  | break; | 
|  | wait_for_completion(&v->completion); | 
|  | } while (1); | 
|  |  | 
|  | put_device(&v->dev); | 
|  | } | 
|  |  | 
|  | static struct vdpa_driver vhost_vdpa_driver = { | 
|  | .driver = { | 
|  | .name	= "vhost_vdpa", | 
|  | }, | 
|  | .probe	= vhost_vdpa_probe, | 
|  | .remove	= vhost_vdpa_remove, | 
|  | }; | 
|  |  | 
|  | static int __init vhost_vdpa_init(void) | 
|  | { | 
|  | int r; | 
|  |  | 
|  | r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, | 
|  | "vhost-vdpa"); | 
|  | if (r) | 
|  | goto err_alloc_chrdev; | 
|  |  | 
|  | r = vdpa_register_driver(&vhost_vdpa_driver); | 
|  | if (r) | 
|  | goto err_vdpa_register_driver; | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_vdpa_register_driver: | 
|  | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | 
|  | err_alloc_chrdev: | 
|  | return r; | 
|  | } | 
|  | module_init(vhost_vdpa_init); | 
|  |  | 
|  | static void __exit vhost_vdpa_exit(void) | 
|  | { | 
|  | vdpa_unregister_driver(&vhost_vdpa_driver); | 
|  | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | 
|  | } | 
|  | module_exit(vhost_vdpa_exit); | 
|  |  | 
|  | MODULE_VERSION("0.0.1"); | 
|  | MODULE_LICENSE("GPL v2"); | 
|  | MODULE_AUTHOR("Intel Corporation"); | 
|  | MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); |