| From cf0d53ba4947aad6e471491d5b20a567cbe92e56 Mon Sep 17 00:00:00 2001 |
| From: Alex Williamson <alex.williamson@redhat.com> |
| Date: Mon, 2 Oct 2017 12:39:10 -0600 |
| Subject: vfio/pci: Virtualize Maximum Read Request Size |
| |
| From: Alex Williamson <alex.williamson@redhat.com> |
| |
| commit cf0d53ba4947aad6e471491d5b20a567cbe92e56 upstream. |
| |
| MRRS defines the maximum read request size a device is allowed to |
| make. Drivers will often increase this to allow more data transfer |
| with a single request. Completions to this request are bound by the |
| MPS setting for the bus. Aside from device quirks (none known), it |
| doesn't seem to make sense to set an MRRS value less than MPS, yet |
| this is a likely scenario given that user drivers do not have a |
| system-wide view of the PCI topology. Virtualize MRRS such that the |
| user can set MRRS >= MPS, but use MPS as the floor value that we'll |
| write to hardware. |
| |
| Signed-off-by: Alex Williamson <alex.williamson@redhat.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/vfio/pci/vfio_pci_config.c | 29 ++++++++++++++++++++++++++--- |
| 1 file changed, 26 insertions(+), 3 deletions(-) |
| |
| --- a/drivers/vfio/pci/vfio_pci_config.c |
| +++ b/drivers/vfio/pci/vfio_pci_config.c |
| @@ -810,6 +810,7 @@ static int vfio_exp_config_write(struct |
| { |
| __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - |
| offset + PCI_EXP_DEVCTL); |
| + int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ; |
| |
| count = vfio_default_config_write(vdev, pos, count, perm, offset, val); |
| if (count < 0) |
| @@ -835,6 +836,27 @@ static int vfio_exp_config_write(struct |
| pci_try_reset_function(vdev->pdev); |
| } |
| |
| + /* |
| + * MPS is virtualized to the user, writes do not change the physical |
| + * register since determining a proper MPS value requires a system wide |
| + * device view. The MRRS is largely independent of MPS, but since the |
| + * user does not have that system-wide view, they might set a safe, but |
| + * inefficiently low value. Here we allow writes through to hardware, |
| + * but we set the floor to the physical device MPS setting, so that |
| + * we can at least use full TLPs, as defined by the MPS value. |
| + * |
| + * NB, if any devices actually depend on an artificially low MRRS |
| + * setting, this will need to be revisited, perhaps with a quirk |
| + * though pcie_set_readrq(). |
| + */ |
| + if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) { |
| + readrq = 128 << |
| + ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12); |
| + readrq = max(readrq, pcie_get_mps(vdev->pdev)); |
| + |
| + pcie_set_readrq(vdev->pdev, readrq); |
| + } |
| + |
| return count; |
| } |
| |
| @@ -853,11 +875,12 @@ static int __init init_pci_cap_exp_perm( |
| * Allow writes to device control fields, except devctl_phantom, |
| * which could confuse IOMMU, MPS, which can break communication |
| * with other physical devices, and the ARI bit in devctl2, which |
| - * is set at probe time. FLR gets virtualized via our writefn. |
| + * is set at probe time. FLR and MRRS get virtualized via our |
| + * writefn. |
| */ |
| p_setw(perm, PCI_EXP_DEVCTL, |
| - PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, |
| - ~PCI_EXP_DEVCTL_PHANTOM); |
| + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD | |
| + PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM); |
| p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); |
| return 0; |
| } |