blob: a70b11fd57d6e2cdb8299befa0a86351a98265b5 [file] [log] [blame]
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved.
*/
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/addrs.h>
#include <asm/sn/io.h>
#include <asm/sn/pcidev.h>
#include <asm/sn/pcibus_provider_defs.h>
#include <asm/sn/tioca_provider.h>
u32 tioca_gart_found;
EXPORT_SYMBOL(tioca_gart_found); /* used by agp-sgi */
LIST_HEAD(tioca_list);
EXPORT_SYMBOL(tioca_list); /* used by agp-sgi */
static int tioca_gart_init(struct tioca_kernel *);
/**
* tioca_gart_init - Initialize SGI TIOCA GART
* @tioca_common: ptr to common prom/kernel struct identifying the
*
* If the indicated tioca has devices present, initialize its associated
* GART MMR's and kernel memory.
*/
static int
tioca_gart_init(struct tioca_kernel *tioca_kern)
{
u64 ap_reg;
u64 offset;
struct page *tmp;
struct tioca_common *tioca_common;
struct tioca __iomem *ca_base;
tioca_common = tioca_kern->ca_common;
ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
if (list_empty(tioca_kern->ca_devices))
return 0;
ap_reg = 0;
/*
* Validate aperature size
*/
switch (CA_APERATURE_SIZE >> 20) {
case 4:
ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT); /* 4MB */
break;
case 8:
ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT); /* 8MB */
break;
case 16:
ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT); /* 16MB */
break;
case 32:
ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT); /* 32 MB */
break;
case 64:
ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT); /* 64 MB */
break;
case 128:
ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT); /* 128 MB */
break;
case 256:
ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT); /* 256 MB */
break;
case 512:
ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT); /* 512 MB */
break;
case 1024:
ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT); /* 1GB */
break;
case 2048:
ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT); /* 2GB */
break;
case 4096:
ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT); /* 4 GB */
break;
default:
printk(KERN_ERR "%s: Invalid CA_APERATURE_SIZE "
"0x%lx\n", __func__, (ulong) CA_APERATURE_SIZE);
return -1;
}
/*
* Set up other aperature parameters
*/
if (PAGE_SIZE >= 16384) {
tioca_kern->ca_ap_pagesize = 16384;
ap_reg |= CA_GART_PAGE_SIZE;
} else {
tioca_kern->ca_ap_pagesize = 4096;
}
tioca_kern->ca_ap_size = CA_APERATURE_SIZE;
tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE;
tioca_kern->ca_gart_entries =
tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize;
ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI);
ap_reg |= tioca_kern->ca_ap_bus_base;
/*
* Allocate and set up the GART
*/
tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64);
tmp =
alloc_pages_node(tioca_kern->ca_closest_node,
GFP_KERNEL | __GFP_ZERO,
get_order(tioca_kern->ca_gart_size));
if (!tmp) {
printk(KERN_ERR "%s: Could not allocate "
"%llu bytes (order %d) for GART\n",
__func__,
tioca_kern->ca_gart_size,
get_order(tioca_kern->ca_gart_size));
return -ENOMEM;
}
tioca_kern->ca_gart = page_address(tmp);
tioca_kern->ca_gart_coretalk_addr =
PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart));
/*
* Compute PCI/AGP convenience fields
*/
offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE;
tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE;
tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE;
tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize;
tioca_kern->ca_pcigart_base =
tioca_kern->ca_gart_coretalk_addr + offset;
tioca_kern->ca_pcigart =
&tioca_kern->ca_gart[tioca_kern->ca_pcigart_start];
tioca_kern->ca_pcigart_entries =
tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize;
tioca_kern->ca_pcigart_pagemap =
kzalloc(tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
if (!tioca_kern->ca_pcigart_pagemap) {
free_pages((unsigned long)tioca_kern->ca_gart,
get_order(tioca_kern->ca_gart_size));
return -1;
}
offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE;
tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE;
tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE;
tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize;
tioca_kern->ca_gfxgart_base =
tioca_kern->ca_gart_coretalk_addr + offset;
tioca_kern->ca_gfxgart =
&tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start];
tioca_kern->ca_gfxgart_entries =
tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize;
/*
* various control settings:
* use agp op-combining
* use GET semantics to fetch memory
* participate in coherency domain
* DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029
*/
__sn_setq_relaxed(&ca_base->ca_control1,
CA_AGPDMA_OP_ENB_COMBDELAY); /* PV895469 ? */
__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
__sn_setq_relaxed(&ca_base->ca_control2,
(0x2ull << CA_GART_MEM_PARAM_SHFT));
tioca_kern->ca_gart_iscoherent = 1;
__sn_clrq_relaxed(&ca_base->ca_control2,
(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB));
/*
* Unmask GART fetch error interrupts. Clear residual errors first.
*/
writeq(CA_GART_FETCH_ERR, &ca_base->ca_int_status_alias);
writeq(CA_GART_FETCH_ERR, &ca_base->ca_mult_error_alias);
__sn_clrq_relaxed(&ca_base->ca_int_mask, CA_GART_FETCH_ERR);
/*
* Program the aperature and gart registers in TIOCA
*/
writeq(ap_reg, &ca_base->ca_gart_aperature);
writeq(tioca_kern->ca_gart_coretalk_addr|1, &ca_base->ca_gart_ptr_table);
return 0;
}
/**
* tioca_fastwrite_enable - enable AGP FW for a tioca and its functions
* @tioca_kernel: structure representing the CA
*
* Given a CA, scan all attached functions making sure they all support
* FastWrite. If so, enable FastWrite for all functions and the CA itself.
*/
void
tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
{
int cap_ptr;
u32 reg;
struct tioca __iomem *tioca_base;
struct pci_dev *pdev;
struct tioca_common *common;
common = tioca_kern->ca_common;
/*
* Scan all vga controllers on this bus making sure they all
* support FW. If not, return.
*/
list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
continue;
cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
if (!cap_ptr)
return; /* no AGP CAP means no FW */
pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, &reg);
if (!(reg & PCI_AGP_STATUS_FW))
return; /* function doesn't support FW */
}
/*
* Set fw for all vga fn's
*/
list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
continue;
cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, &reg);
reg |= PCI_AGP_COMMAND_FW;
pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg);
}
/*
* Set ca's fw to match
*/
tioca_base = (struct tioca __iomem*)common->ca_common.bs_base;
__sn_setq_relaxed(&tioca_base->ca_control1, CA_AGP_FW_ENABLE);
}
EXPORT_SYMBOL(tioca_fastwrite_enable); /* used by agp-sgi */
/**
* tioca_dma_d64 - create a DMA mapping using 64-bit direct mode
* @paddr: system physical address
*
* Map @paddr into 64-bit CA bus space. No device context is necessary.
* Bits 53:0 come from the coretalk address. We just need to mask in the
* following optional bits of the 64-bit pci address:
*
* 63:60 - Coretalk Packet Type - 0x1 for Mem Get/Put (coherent)
* 0x2 for PIO (non-coherent)
* We will always use 0x1
* 55:55 - Swap bytes Currently unused
*/
static u64
tioca_dma_d64(unsigned long paddr)
{
dma_addr_t bus_addr;
bus_addr = PHYS_TO_TIODMA(paddr);
BUG_ON(!bus_addr);
BUG_ON(bus_addr >> 54);
/* Set upper nibble to Cache Coherent Memory op */
bus_addr |= (1UL << 60);
return bus_addr;
}
/**
* tioca_dma_d48 - create a DMA mapping using 48-bit direct mode
* @pdev: linux pci_dev representing the function
* @paddr: system physical address
*
* Map @paddr into 64-bit bus space of the CA associated with @pcidev_info.
*
* The CA agp 48 bit direct address falls out as follows:
*
* When direct mapping AGP addresses, the 48 bit AGP address is
* constructed as follows:
*
* [47:40] - Low 8 bits of the page Node ID extracted from coretalk
* address [47:40]. The upper 8 node bits are fixed
* and come from the xxx register bits [5:0]
* [39:38] - Chiplet ID extracted from coretalk address [39:38]
* [37:00] - node offset extracted from coretalk address [37:00]
*
* Since the node id in general will be non-zero, and the chiplet id
* will always be non-zero, it follows that the device must support
* a dma mask of at least 0xffffffffff (40 bits) to target node 0
* and in general should be 0xffffffffffff (48 bits) to target nodes
* up to 255. Nodes above 255 need the support of the xxx register,
* and so a given CA can only directly target nodes in the range
* xxx - xxx+255.
*/
static u64
tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
{
struct tioca_common *tioca_common;
struct tioca __iomem *ca_base;
u64 ct_addr;
dma_addr_t bus_addr;
u32 node_upper;
u64 agp_dma_extn;
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
ct_addr = PHYS_TO_TIODMA(paddr);
if (!ct_addr)
return 0;
bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffffUL);
node_upper = ct_addr >> 48;
if (node_upper > 64) {
printk(KERN_ERR "%s: coretalk addr 0x%p node id out "
"of range\n", __func__, (void *)ct_addr);
return 0;
}
agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn);
if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
printk(KERN_ERR "%s: coretalk upper node (%u) "
"mismatch with ca_agp_dma_addr_extn (%llu)\n",
__func__,
node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
return 0;
}
return bus_addr;
}
/**
* tioca_dma_mapped - create a DMA mapping using a CA GART
* @pdev: linux pci_dev representing the function
* @paddr: host physical address to map
* @req_size: len (bytes) to map
*
* Map @paddr into CA address space using the GART mechanism. The mapped
* dma_addr_t is guaranteed to be contiguous in CA bus space.
*/
static dma_addr_t
tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size)
{
int ps, ps_shift, entry, entries, mapsize;
u64 xio_addr, end_xio_addr;
struct tioca_common *tioca_common;
struct tioca_kernel *tioca_kern;
dma_addr_t bus_addr = 0;
struct tioca_dmamap *ca_dmamap;
void *map;
unsigned long flags;
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
xio_addr = PHYS_TO_TIODMA(paddr);
if (!xio_addr)
return 0;
spin_lock_irqsave(&tioca_kern->ca_lock, flags);
/*
* allocate a map struct
*/
ca_dmamap = kzalloc(sizeof(struct tioca_dmamap), GFP_ATOMIC);
if (!ca_dmamap)
goto map_return;
/*
* Locate free entries that can hold req_size. Account for
* unaligned start/length when allocating.
*/
ps = tioca_kern->ca_ap_pagesize; /* will be power of 2 */
ps_shift = ffs(ps) - 1;
end_xio_addr = xio_addr + req_size - 1;
entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1;
map = tioca_kern->ca_pcigart_pagemap;
mapsize = tioca_kern->ca_pcigart_entries;
entry = bitmap_find_next_zero_area(map, mapsize, 0, entries, 0);
if (entry >= mapsize) {
kfree(ca_dmamap);
goto map_return;
}
bitmap_set(map, entry, entries);
bus_addr = tioca_kern->ca_pciap_base + (entry * ps);
ca_dmamap->cad_dma_addr = bus_addr;
ca_dmamap->cad_gart_size = entries;
ca_dmamap->cad_gart_entry = entry;
list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps);
if (xio_addr % ps) {
tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
bus_addr += xio_addr & (ps - 1);
xio_addr &= ~(ps - 1);
xio_addr += ps;
entry++;
}
while (xio_addr < end_xio_addr) {
tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
xio_addr += ps;
entry++;
}
tioca_tlbflush(tioca_kern);
map_return:
spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
return bus_addr;
}
/**
* tioca_dma_unmap - release CA mapping resources
* @pdev: linux pci_dev representing the function
* @bus_addr: bus address returned by an earlier tioca_dma_map
* @dir: mapping direction (unused)
*
* Locate mapping resources associated with @bus_addr and release them.
* For mappings created using the direct modes (64 or 48) there are no
* resources to release.
*/
static void
tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
{
int i, entry;
struct tioca_common *tioca_common;
struct tioca_kernel *tioca_kern;
struct tioca_dmamap *map;
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
unsigned long flags;
tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
/* return straight away if this isn't be a mapped address */
if (bus_addr < tioca_kern->ca_pciap_base ||
bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size))
return;
spin_lock_irqsave(&tioca_kern->ca_lock, flags);
list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list)
if (map->cad_dma_addr == bus_addr)
break;
BUG_ON(map == NULL);
entry = map->cad_gart_entry;
for (i = 0; i < map->cad_gart_size; i++, entry++) {
clear_bit(entry, tioca_kern->ca_pcigart_pagemap);
tioca_kern->ca_pcigart[entry] = 0;
}
tioca_tlbflush(tioca_kern);
list_del(&map->cad_list);
spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
kfree(map);
}
/**
* tioca_dma_map - map pages for PCI DMA
* @pdev: linux pci_dev representing the function
* @paddr: host physical address to map
* @byte_count: bytes to map
*
* This is the main wrapper for mapping host physical pages to CA PCI space.
* The mapping mode used is based on the devices dma_mask. As a last resort
* use the GART mapped mode.
*/
static u64
tioca_dma_map(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags)
{
u64 mapaddr;
/*
* Not supported for now ...
*/
if (dma_flags & SN_DMA_MSI)
return 0;
/*
* If card is 64 or 48 bit addressable, use a direct mapping. 32
* bit direct is so restrictive w.r.t. where the memory resides that
* we don't use it even though CA has some support.
*/
if (pdev->dma_mask == ~0UL)
mapaddr = tioca_dma_d64(paddr);
else if (pdev->dma_mask == 0xffffffffffffUL)
mapaddr = tioca_dma_d48(pdev, paddr);
else
mapaddr = 0;
/* Last resort ... use PCI portion of CA GART */
if (mapaddr == 0)
mapaddr = tioca_dma_mapped(pdev, paddr, byte_count);
return mapaddr;
}
/**
* tioca_error_intr_handler - SGI TIO CA error interrupt handler
* @irq: unused
* @arg: pointer to tioca_common struct for the given CA
*
* Handle a CA error interrupt. Simply a wrapper around a SAL call which
* defers processing to the SGI prom.
*/
static irqreturn_t
tioca_error_intr_handler(int irq, void *arg)
{
struct tioca_common *soft = arg;
struct ia64_sal_retval ret_stuff;
u64 segment;
u64 busnum;
ret_stuff.status = 0;
ret_stuff.v0 = 0;
segment = soft->ca_common.bs_persist_segment;
busnum = soft->ca_common.bs_persist_busnum;
SAL_CALL_NOLOCK(ret_stuff,
(u64) SN_SAL_IOIF_ERROR_INTERRUPT,
segment, busnum, 0, 0, 0, 0, 0);
return IRQ_HANDLED;
}
/**
* tioca_bus_fixup - perform final PCI fixup for a TIO CA bus
* @prom_bussoft: Common prom/kernel struct representing the bus
*
* Replicates the tioca_common pointed to by @prom_bussoft in kernel
* space. Allocates and initializes a kernel-only area for a given CA,
* and sets up an irq for handling CA error interrupts.
*
* On successful setup, returns the kernel version of tioca_common back to
* the caller.
*/
static void *
tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
{
struct tioca_common *tioca_common;
struct tioca_kernel *tioca_kern;
struct pci_bus *bus;
/* sanity check prom rev */
if (is_shub1() && sn_sal_rev() < 0x0406) {
printk
(KERN_ERR "%s: SGI prom rev 4.06 or greater required "
"for tioca support\n", __func__);
return NULL;
}
/*
* Allocate kernel bus soft and copy from prom.
*/
tioca_common = kmemdup(prom_bussoft, sizeof(struct tioca_common),
GFP_KERNEL);
if (!tioca_common)
return NULL;
tioca_common->ca_common.bs_base = (unsigned long)
ioremap(REGION_OFFSET(tioca_common->ca_common.bs_base),
sizeof(struct tioca_common));
/* init kernel-private area */
tioca_kern = kzalloc(sizeof(struct tioca_kernel), GFP_KERNEL);
if (!tioca_kern) {
kfree(tioca_common);
return NULL;
}
tioca_kern->ca_common = tioca_common;
spin_lock_init(&tioca_kern->ca_lock);
INIT_LIST_HEAD(&tioca_kern->ca_dmamaps);
tioca_kern->ca_closest_node =
nasid_to_cnodeid(tioca_common->ca_closest_nasid);
tioca_common->ca_kernel_private = (u64) tioca_kern;
bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment,
tioca_common->ca_common.bs_persist_busnum);
BUG_ON(!bus);
tioca_kern->ca_devices = &bus->devices;
/* init GART */
if (tioca_gart_init(tioca_kern) < 0) {
kfree(tioca_kern);
kfree(tioca_common);
return NULL;
}
tioca_gart_found++;
list_add(&tioca_kern->ca_list, &tioca_list);
if (request_irq(SGI_TIOCA_ERROR,
tioca_error_intr_handler,
IRQF_SHARED, "TIOCA error", (void *)tioca_common))
printk(KERN_WARNING
"%s: Unable to get irq %d. "
"Error interrupts won't be routed for TIOCA bus %d\n",
__func__, SGI_TIOCA_ERROR,
(int)tioca_common->ca_common.bs_persist_busnum);
irq_set_handler(SGI_TIOCA_ERROR, handle_level_irq);
sn_set_err_irq_affinity(SGI_TIOCA_ERROR);
/* Setup locality information */
controller->node = tioca_kern->ca_closest_node;
return tioca_common;
}
static struct sn_pcibus_provider tioca_pci_interfaces = {
.dma_map = tioca_dma_map,
.dma_map_consistent = tioca_dma_map,
.dma_unmap = tioca_dma_unmap,
.bus_fixup = tioca_bus_fixup,
.force_interrupt = NULL,
.target_interrupt = NULL
};
/**
* tioca_init_provider - init SN PCI provider ops for TIO CA
*/
int
tioca_init_provider(void)
{
sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces;
return 0;
}