| From foo@baz Sat Jan 26 10:22:50 CET 2019 |
| From: Jason Wang <jasowang@redhat.com> |
| Date: Wed, 16 Jan 2019 16:54:42 +0800 |
| Subject: vhost: log dirty page correctly |
| |
| From: Jason Wang <jasowang@redhat.com> |
| |
| [ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] |
| |
| Vhost dirty page logging API is designed to sync through GPA. But we |
| try to log GIOVA when device IOTLB is enabled. This is wrong and may |
| lead to missing data after migration. |
| |
| To solve this issue, when logging with device IOTLB enabled, we will: |
| |
| 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to |
| get HVA, for writable descriptor, get HVA through iovec. For used |
| ring update, translate its GIOVA to HVA |
| 2) traverse the GPA->HVA mapping to get the possible GPA and log |
| through GPA. Pay attention this reverse mapping is not guaranteed |
| to be unique, so we should log each possible GPA in this case. |
| |
| This fix the failure of scp to guest during migration. In -next, we |
| will probably support passing GIOVA->GPA instead of GIOVA->HVA. |
| |
| Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") |
| Reported-by: Jintack Lim <jintack@cs.columbia.edu> |
| Cc: Jintack Lim <jintack@cs.columbia.edu> |
| Signed-off-by: Jason Wang <jasowang@redhat.com> |
| Acked-by: Michael S. Tsirkin <mst@redhat.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| drivers/vhost/net.c | 3 + |
| drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- |
| drivers/vhost/vhost.h | 3 + |
| 3 files changed, 87 insertions(+), 16 deletions(-) |
| |
| --- a/drivers/vhost/net.c |
| +++ b/drivers/vhost/net.c |
| @@ -1192,7 +1192,8 @@ static void handle_rx(struct vhost_net * |
| if (nvq->done_idx > VHOST_NET_BATCH) |
| vhost_net_signal_used(nvq); |
| if (unlikely(vq_log)) |
| - vhost_log_write(vq, vq_log, log, vhost_len); |
| + vhost_log_write(vq, vq_log, log, vhost_len, |
| + vq->iov, in); |
| total_len += vhost_len; |
| if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { |
| vhost_poll_queue(&vq->poll); |
| --- a/drivers/vhost/vhost.c |
| +++ b/drivers/vhost/vhost.c |
| @@ -1733,13 +1733,87 @@ static int log_write(void __user *log_ba |
| return r; |
| } |
| |
| +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) |
| +{ |
| + struct vhost_umem *umem = vq->umem; |
| + struct vhost_umem_node *u; |
| + u64 start, end, l, min; |
| + int r; |
| + bool hit = false; |
| + |
| + while (len) { |
| + min = len; |
| + /* More than one GPAs can be mapped into a single HVA. So |
| + * iterate all possible umems here to be safe. |
| + */ |
| + list_for_each_entry(u, &umem->umem_list, link) { |
| + if (u->userspace_addr > hva - 1 + len || |
| + u->userspace_addr - 1 + u->size < hva) |
| + continue; |
| + start = max(u->userspace_addr, hva); |
| + end = min(u->userspace_addr - 1 + u->size, |
| + hva - 1 + len); |
| + l = end - start + 1; |
| + r = log_write(vq->log_base, |
| + u->start + start - u->userspace_addr, |
| + l); |
| + if (r < 0) |
| + return r; |
| + hit = true; |
| + min = min(l, min); |
| + } |
| + |
| + if (!hit) |
| + return -EFAULT; |
| + |
| + len -= min; |
| + hva += min; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) |
| +{ |
| + struct iovec iov[64]; |
| + int i, ret; |
| + |
| + if (!vq->iotlb) |
| + return log_write(vq->log_base, vq->log_addr + used_offset, len); |
| + |
| + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, |
| + len, iov, 64, VHOST_ACCESS_WO); |
| + if (ret) |
| + return ret; |
| + |
| + for (i = 0; i < ret; i++) { |
| + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, |
| + iov[i].iov_len); |
| + if (ret) |
| + return ret; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |
| - unsigned int log_num, u64 len) |
| + unsigned int log_num, u64 len, struct iovec *iov, int count) |
| { |
| int i, r; |
| |
| /* Make sure data written is seen before log. */ |
| smp_wmb(); |
| + |
| + if (vq->iotlb) { |
| + for (i = 0; i < count; i++) { |
| + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, |
| + iov[i].iov_len); |
| + if (r < 0) |
| + return r; |
| + } |
| + return 0; |
| + } |
| + |
| for (i = 0; i < log_num; ++i) { |
| u64 l = min(log[i].len, len); |
| r = log_write(vq->log_base, log[i].addr, l); |
| @@ -1769,9 +1843,8 @@ static int vhost_update_used_flags(struc |
| smp_wmb(); |
| /* Log used flag write. */ |
| used = &vq->used->flags; |
| - log_write(vq->log_base, vq->log_addr + |
| - (used - (void __user *)vq->used), |
| - sizeof vq->used->flags); |
| + log_used(vq, (used - (void __user *)vq->used), |
| + sizeof vq->used->flags); |
| if (vq->log_ctx) |
| eventfd_signal(vq->log_ctx, 1); |
| } |
| @@ -1789,9 +1862,8 @@ static int vhost_update_avail_event(stru |
| smp_wmb(); |
| /* Log avail event write */ |
| used = vhost_avail_event(vq); |
| - log_write(vq->log_base, vq->log_addr + |
| - (used - (void __user *)vq->used), |
| - sizeof *vhost_avail_event(vq)); |
| + log_used(vq, (used - (void __user *)vq->used), |
| + sizeof *vhost_avail_event(vq)); |
| if (vq->log_ctx) |
| eventfd_signal(vq->log_ctx, 1); |
| } |
| @@ -2191,10 +2263,8 @@ static int __vhost_add_used_n(struct vho |
| /* Make sure data is seen before log. */ |
| smp_wmb(); |
| /* Log used ring entry write. */ |
| - log_write(vq->log_base, |
| - vq->log_addr + |
| - ((void __user *)used - (void __user *)vq->used), |
| - count * sizeof *used); |
| + log_used(vq, ((void __user *)used - (void __user *)vq->used), |
| + count * sizeof *used); |
| } |
| old = vq->last_used_idx; |
| new = (vq->last_used_idx += count); |
| @@ -2236,9 +2306,8 @@ int vhost_add_used_n(struct vhost_virtqu |
| /* Make sure used idx is seen before log. */ |
| smp_wmb(); |
| /* Log used index update. */ |
| - log_write(vq->log_base, |
| - vq->log_addr + offsetof(struct vring_used, idx), |
| - sizeof vq->used->idx); |
| + log_used(vq, offsetof(struct vring_used, idx), |
| + sizeof vq->used->idx); |
| if (vq->log_ctx) |
| eventfd_signal(vq->log_ctx, 1); |
| } |
| --- a/drivers/vhost/vhost.h |
| +++ b/drivers/vhost/vhost.h |
| @@ -205,7 +205,8 @@ bool vhost_vq_avail_empty(struct vhost_d |
| bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); |
| |
| int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |
| - unsigned int log_num, u64 len); |
| + unsigned int log_num, u64 len, |
| + struct iovec *iov, int count); |
| int vq_iotlb_prefetch(struct vhost_virtqueue *vq); |
| |
| struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); |