| From 3269ee0bd6686baf86630300d528500ac5b516d7 Mon Sep 17 00:00:00 2001 |
| From: Alex Williamson <alex.williamson@redhat.com> |
| Date: Sat, 15 Jun 2013 10:27:19 -0600 |
| Subject: intel-iommu: Fix leaks in pagetable freeing |
| |
| From: Alex Williamson <alex.williamson@redhat.com> |
| |
| commit 3269ee0bd6686baf86630300d528500ac5b516d7 upstream. |
| |
| At best the current code only seems to free the leaf pagetables and |
| the root. If you're unlucky enough to have a large gap (like any |
| QEMU guest with more than 3G of memory), only the first chunk of leaf |
| pagetables are freed (plus the root). This is a massive memory leak. |
| This patch re-writes the pagetable freeing function to use a |
| recursive algorithm and manages to not only free all the pagetables, |
| but does it without any apparent performance loss versus the current |
| broken version. |
| |
| Signed-off-by: Alex Williamson <alex.williamson@redhat.com> |
| Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> |
| Signed-off-by: Joerg Roedel <joro@8bytes.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/iommu/intel-iommu.c | 74 +++++++++++++++++++++----------------------- |
| 1 file changed, 36 insertions(+), 38 deletions(-) |
| |
| --- a/drivers/iommu/intel-iommu.c |
| +++ b/drivers/iommu/intel-iommu.c |
| @@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dm |
| return order; |
| } |
| |
| +static void dma_pte_free_level(struct dmar_domain *domain, int level, |
| + struct dma_pte *pte, unsigned long pfn, |
| + unsigned long start_pfn, unsigned long last_pfn) |
| +{ |
| + pfn = max(start_pfn, pfn); |
| + pte = &pte[pfn_level_offset(pfn, level)]; |
| + |
| + do { |
| + unsigned long level_pfn; |
| + struct dma_pte *level_pte; |
| + |
| + if (!dma_pte_present(pte) || dma_pte_superpage(pte)) |
| + goto next; |
| + |
| + level_pfn = pfn & level_mask(level - 1); |
| + level_pte = phys_to_virt(dma_pte_addr(pte)); |
| + |
| + if (level > 2) |
| + dma_pte_free_level(domain, level - 1, level_pte, |
| + level_pfn, start_pfn, last_pfn); |
| + |
| + /* If range covers entire pagetable, free it */ |
| + if (!(start_pfn > level_pfn || |
| + last_pfn < level_pfn + level_size(level))) { |
| + dma_clear_pte(pte); |
| + domain_flush_cache(domain, pte, sizeof(*pte)); |
| + free_pgtable_page(level_pte); |
| + } |
| +next: |
| + pfn += level_size(level); |
| + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); |
| +} |
| + |
| /* free page table pages. last level pte should already be cleared */ |
| static void dma_pte_free_pagetable(struct dmar_domain *domain, |
| unsigned long start_pfn, |
| unsigned long last_pfn) |
| { |
| int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
| - struct dma_pte *first_pte, *pte; |
| - int total = agaw_to_level(domain->agaw); |
| - int level; |
| - unsigned long tmp; |
| - int large_page = 2; |
| |
| BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
| BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); |
| BUG_ON(start_pfn > last_pfn); |
| |
| /* We don't need lock here; nobody else touches the iova range */ |
| - level = 2; |
| - while (level <= total) { |
| - tmp = align_to_level(start_pfn, level); |
| - |
| - /* If we can't even clear one PTE at this level, we're done */ |
| - if (tmp + level_size(level) - 1 > last_pfn) |
| - return; |
| - |
| - do { |
| - large_page = level; |
| - first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); |
| - if (large_page > level) |
| - level = large_page + 1; |
| - if (!pte) { |
| - tmp = align_to_level(tmp + 1, level + 1); |
| - continue; |
| - } |
| - do { |
| - if (dma_pte_present(pte)) { |
| - free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); |
| - dma_clear_pte(pte); |
| - } |
| - pte++; |
| - tmp += level_size(level); |
| - } while (!first_pte_in_page(pte) && |
| - tmp + level_size(level) - 1 <= last_pfn); |
| - |
| - domain_flush_cache(domain, first_pte, |
| - (void *)pte - (void *)first_pte); |
| - |
| - } while (tmp && tmp + level_size(level) - 1 <= last_pfn); |
| - level++; |
| - } |
| + dma_pte_free_level(domain, agaw_to_level(domain->agaw), |
| + domain->pgd, 0, start_pfn, last_pfn); |
| + |
| /* free pgd */ |
| if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { |
| free_pgtable_page(domain->pgd); |