|  | // SPDX-License-Identifier: GPL-2.0-only | 
|  | /* | 
|  | * IOMMU API for RISC-V IOMMU implementations. | 
|  | * | 
|  | * Copyright © 2022-2024 Rivos Inc. | 
|  | * Copyright © 2023 FORTH-ICS/CARV | 
|  | * | 
|  | * Authors | 
|  | *	Tomasz Jeznach <tjeznach@rivosinc.com> | 
|  | *	Nick Kossifidis <mick@ics.forth.gr> | 
|  | */ | 
|  |  | 
|  | #define pr_fmt(fmt) "riscv-iommu: " fmt | 
|  |  | 
|  | #include <linux/compiler.h> | 
|  | #include <linux/crash_dump.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/iommu.h> | 
|  | #include <linux/iopoll.h> | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/pci.h> | 
|  |  | 
|  | #include "../iommu-pages.h" | 
|  | #include "iommu-bits.h" | 
|  | #include "iommu.h" | 
|  |  | 
|  | /* Timeouts in [us] */ | 
|  | #define RISCV_IOMMU_QCSR_TIMEOUT	150000 | 
|  | #define RISCV_IOMMU_QUEUE_TIMEOUT	150000 | 
|  | #define RISCV_IOMMU_DDTP_TIMEOUT	10000000 | 
|  | #define RISCV_IOMMU_IOTINVAL_TIMEOUT	90000000 | 
|  |  | 
|  | /* Number of entries per CMD/FLT queue, should be <= INT_MAX */ | 
|  | #define RISCV_IOMMU_DEF_CQ_COUNT	8192 | 
|  | #define RISCV_IOMMU_DEF_FQ_COUNT	4096 | 
|  |  | 
|  | /* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */ | 
|  | #define phys_to_ppn(pa)  (((pa) >> 2) & (((1ULL << 44) - 1) << 10)) | 
|  | #define ppn_to_phys(pn)	 (((pn) << 2) & (((1ULL << 44) - 1) << 12)) | 
|  |  | 
|  | #define dev_to_iommu(dev) \ | 
|  | iommu_get_iommu_dev(dev, struct riscv_iommu_device, iommu) | 
|  |  | 
|  | /* IOMMU PSCID allocation namespace. */ | 
|  | static DEFINE_IDA(riscv_iommu_pscids); | 
|  | #define RISCV_IOMMU_MAX_PSCID		(BIT(20) - 1) | 
|  |  | 
|  | /* Device resource-managed allocations */ | 
|  | struct riscv_iommu_devres { | 
|  | void *addr; | 
|  | }; | 
|  |  | 
|  | static void riscv_iommu_devres_pages_release(struct device *dev, void *res) | 
|  | { | 
|  | struct riscv_iommu_devres *devres = res; | 
|  |  | 
|  | iommu_free_pages(devres->addr); | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p) | 
|  | { | 
|  | struct riscv_iommu_devres *devres = res; | 
|  | struct riscv_iommu_devres *target = p; | 
|  |  | 
|  | return devres->addr == target->addr; | 
|  | } | 
|  |  | 
|  | static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, | 
|  | unsigned int size) | 
|  | { | 
|  | struct riscv_iommu_devres *devres; | 
|  | void *addr; | 
|  |  | 
|  | addr = iommu_alloc_pages_node_sz(dev_to_node(iommu->dev), | 
|  | GFP_KERNEL_ACCOUNT, size); | 
|  | if (unlikely(!addr)) | 
|  | return NULL; | 
|  |  | 
|  | devres = devres_alloc(riscv_iommu_devres_pages_release, | 
|  | sizeof(struct riscv_iommu_devres), GFP_KERNEL); | 
|  |  | 
|  | if (unlikely(!devres)) { | 
|  | iommu_free_pages(addr); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | devres->addr = addr; | 
|  |  | 
|  | devres_add(iommu->dev, devres); | 
|  |  | 
|  | return addr; | 
|  | } | 
|  |  | 
|  | static void riscv_iommu_free_pages(struct riscv_iommu_device *iommu, void *addr) | 
|  | { | 
|  | struct riscv_iommu_devres devres = { .addr = addr }; | 
|  |  | 
|  | devres_release(iommu->dev, riscv_iommu_devres_pages_release, | 
|  | riscv_iommu_devres_pages_match, &devres); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Hardware queue allocation and management. | 
|  | */ | 
|  |  | 
|  | /* Setup queue base, control registers and default queue length */ | 
|  | #define RISCV_IOMMU_QUEUE_INIT(q, name) do {				\ | 
|  | struct riscv_iommu_queue *_q = q;				\ | 
|  | _q->qid = RISCV_IOMMU_INTR_ ## name;				\ | 
|  | _q->qbr = RISCV_IOMMU_REG_ ## name ## B;			\ | 
|  | _q->qcr = RISCV_IOMMU_REG_ ## name ## CSR;			\ | 
|  | _q->mask = _q->mask ?: (RISCV_IOMMU_DEF_ ## name ## _COUNT) - 1;\ | 
|  | } while (0) | 
|  |  | 
|  | /* Note: offsets are the same for all queues */ | 
|  | #define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB)) | 
|  | #define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB)) | 
|  | #define Q_ITEM(q, index) ((q)->mask & (index)) | 
|  | #define Q_IPSR(q) BIT((q)->qid) | 
|  |  | 
|  | /* | 
|  | * Discover queue ring buffer hardware configuration, allocate in-memory | 
|  | * ring buffer or use fixed I/O memory location, configure queue base register. | 
|  | * Must be called before hardware queue is enabled. | 
|  | * | 
|  | * @queue - data structure, configured with RISCV_IOMMU_QUEUE_INIT() | 
|  | * @entry_size - queue single element size in bytes. | 
|  | */ | 
|  | static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu, | 
|  | struct riscv_iommu_queue *queue, | 
|  | size_t entry_size) | 
|  | { | 
|  | unsigned int logsz; | 
|  | u64 qb, rb; | 
|  |  | 
|  | /* | 
|  | * Use WARL base register property to discover maximum allowed | 
|  | * number of entries and optional fixed IO address for queue location. | 
|  | */ | 
|  | riscv_iommu_writeq(iommu, queue->qbr, RISCV_IOMMU_QUEUE_LOG2SZ_FIELD); | 
|  | qb = riscv_iommu_readq(iommu, queue->qbr); | 
|  |  | 
|  | /* | 
|  | * Calculate and verify hardware supported queue length, as reported | 
|  | * by the field LOG2SZ, where max queue length is equal to 2^(LOG2SZ + 1). | 
|  | * Update queue size based on hardware supported value. | 
|  | */ | 
|  | logsz = ilog2(queue->mask); | 
|  | if (logsz > FIELD_GET(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, qb)) | 
|  | logsz = FIELD_GET(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, qb); | 
|  |  | 
|  | /* | 
|  | * Use WARL base register property to discover an optional fixed IO | 
|  | * address for queue ring buffer location. Otherwise allocate contiguous | 
|  | * system memory. | 
|  | */ | 
|  | if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) { | 
|  | const size_t queue_size = entry_size << (logsz + 1); | 
|  |  | 
|  | queue->phys = pfn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)); | 
|  | queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size); | 
|  | } else { | 
|  | do { | 
|  | const size_t queue_size = entry_size << (logsz + 1); | 
|  |  | 
|  | queue->base = riscv_iommu_get_pages( | 
|  | iommu, max(queue_size, SZ_4K)); | 
|  | queue->phys = __pa(queue->base); | 
|  | } while (!queue->base && logsz-- > 0); | 
|  | } | 
|  |  | 
|  | if (!queue->base) | 
|  | return -ENOMEM; | 
|  |  | 
|  | qb = phys_to_ppn(queue->phys) | | 
|  | FIELD_PREP(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, logsz); | 
|  |  | 
|  | /* Update base register and read back to verify hw accepted our write */ | 
|  | riscv_iommu_writeq(iommu, queue->qbr, qb); | 
|  | rb = riscv_iommu_readq(iommu, queue->qbr); | 
|  | if (rb != qb) { | 
|  | dev_err(iommu->dev, "queue #%u allocation failed\n", queue->qid); | 
|  | return -ENODEV; | 
|  | } | 
|  |  | 
|  | /* Update actual queue mask */ | 
|  | queue->mask = (2U << logsz) - 1; | 
|  |  | 
|  | dev_dbg(iommu->dev, "queue #%u allocated 2^%u entries", | 
|  | queue->qid, logsz + 1); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Check interrupt queue status, IPSR */ | 
|  | static irqreturn_t riscv_iommu_queue_ipsr(int irq, void *data) | 
|  | { | 
|  | struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data; | 
|  |  | 
|  | if (riscv_iommu_readl(queue->iommu, RISCV_IOMMU_REG_IPSR) & Q_IPSR(queue)) | 
|  | return IRQ_WAKE_THREAD; | 
|  |  | 
|  | return IRQ_NONE; | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_queue_vec(struct riscv_iommu_device *iommu, int n) | 
|  | { | 
|  | /* Reuse ICVEC.CIV mask for all interrupt vectors mapping. */ | 
|  | return (iommu->icvec >> (n * 4)) & RISCV_IOMMU_ICVEC_CIV; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Enable queue processing in the hardware, register interrupt handler. | 
|  | * | 
|  | * @queue - data structure, already allocated with riscv_iommu_queue_alloc() | 
|  | * @irq_handler - threaded interrupt handler. | 
|  | */ | 
|  | static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu, | 
|  | struct riscv_iommu_queue *queue, | 
|  | irq_handler_t irq_handler) | 
|  | { | 
|  | const unsigned int irq = iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)]; | 
|  | u32 csr; | 
|  | int rc; | 
|  |  | 
|  | if (queue->iommu) | 
|  | return -EBUSY; | 
|  |  | 
|  | /* Polling not implemented */ | 
|  | if (!irq) | 
|  | return -ENODEV; | 
|  |  | 
|  | queue->iommu = iommu; | 
|  | rc = request_threaded_irq(irq, riscv_iommu_queue_ipsr, irq_handler, | 
|  | IRQF_ONESHOT | IRQF_SHARED, | 
|  | dev_name(iommu->dev), queue); | 
|  | if (rc) { | 
|  | queue->iommu = NULL; | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* Empty queue before enabling it */ | 
|  | if (queue->qid == RISCV_IOMMU_INTR_CQ) | 
|  | riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0); | 
|  | else | 
|  | riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0); | 
|  |  | 
|  | /* | 
|  | * Enable queue with interrupts, clear any memory fault if any. | 
|  | * Wait for the hardware to acknowledge request and activate queue | 
|  | * processing. | 
|  | * Note: All CSR bitfields are in the same offsets for all queues. | 
|  | */ | 
|  | riscv_iommu_writel(iommu, queue->qcr, | 
|  | RISCV_IOMMU_QUEUE_ENABLE | | 
|  | RISCV_IOMMU_QUEUE_INTR_ENABLE | | 
|  | RISCV_IOMMU_QUEUE_MEM_FAULT); | 
|  |  | 
|  | riscv_iommu_readl_timeout(iommu, queue->qcr, | 
|  | csr, !(csr & RISCV_IOMMU_QUEUE_BUSY), | 
|  | 10, RISCV_IOMMU_QCSR_TIMEOUT); | 
|  |  | 
|  | if (RISCV_IOMMU_QUEUE_ACTIVE != (csr & (RISCV_IOMMU_QUEUE_ACTIVE | | 
|  | RISCV_IOMMU_QUEUE_BUSY | | 
|  | RISCV_IOMMU_QUEUE_MEM_FAULT))) { | 
|  | /* Best effort to stop and disable failing hardware queue. */ | 
|  | riscv_iommu_writel(iommu, queue->qcr, 0); | 
|  | free_irq(irq, queue); | 
|  | queue->iommu = NULL; | 
|  | dev_err(iommu->dev, "queue #%u failed to start\n", queue->qid); | 
|  | return -EBUSY; | 
|  | } | 
|  |  | 
|  | /* Clear any pending interrupt flag. */ | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue)); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Disable queue. Wait for the hardware to acknowledge request and | 
|  | * stop processing enqueued requests. Report errors but continue. | 
|  | */ | 
|  | static void riscv_iommu_queue_disable(struct riscv_iommu_queue *queue) | 
|  | { | 
|  | struct riscv_iommu_device *iommu = queue->iommu; | 
|  | u32 csr; | 
|  |  | 
|  | if (!iommu) | 
|  | return; | 
|  |  | 
|  | free_irq(iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)], queue); | 
|  | riscv_iommu_writel(iommu, queue->qcr, 0); | 
|  | riscv_iommu_readl_timeout(iommu, queue->qcr, | 
|  | csr, !(csr & RISCV_IOMMU_QUEUE_BUSY), | 
|  | 10, RISCV_IOMMU_QCSR_TIMEOUT); | 
|  |  | 
|  | if (csr & (RISCV_IOMMU_QUEUE_ACTIVE | RISCV_IOMMU_QUEUE_BUSY)) | 
|  | dev_err(iommu->dev, "fail to disable hardware queue #%u, csr 0x%x\n", | 
|  | queue->qid, csr); | 
|  |  | 
|  | queue->iommu = NULL; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Returns number of available valid queue entries and the first item index. | 
|  | * Update shadow producer index if necessary. | 
|  | */ | 
|  | static int riscv_iommu_queue_consume(struct riscv_iommu_queue *queue, | 
|  | unsigned int *index) | 
|  | { | 
|  | unsigned int head = atomic_read(&queue->head); | 
|  | unsigned int tail = atomic_read(&queue->tail); | 
|  | unsigned int last = Q_ITEM(queue, tail); | 
|  | int available = (int)(tail - head); | 
|  |  | 
|  | *index = head; | 
|  |  | 
|  | if (available > 0) | 
|  | return available; | 
|  |  | 
|  | /* read hardware producer index, check reserved register bits are not set. */ | 
|  | if (riscv_iommu_readl_timeout(queue->iommu, Q_TAIL(queue), | 
|  | tail, (tail & ~queue->mask) == 0, | 
|  | 0, RISCV_IOMMU_QUEUE_TIMEOUT)) { | 
|  | dev_err_once(queue->iommu->dev, | 
|  | "Hardware error: queue access timeout\n"); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (tail == last) | 
|  | return 0; | 
|  |  | 
|  | /* update shadow producer index */ | 
|  | return (int)(atomic_add_return((tail - last) & queue->mask, &queue->tail) - head); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Release processed queue entries, should match riscv_iommu_queue_consume() calls. | 
|  | */ | 
|  | static void riscv_iommu_queue_release(struct riscv_iommu_queue *queue, int count) | 
|  | { | 
|  | const unsigned int head = atomic_add_return(count, &queue->head); | 
|  |  | 
|  | riscv_iommu_writel(queue->iommu, Q_HEAD(queue), Q_ITEM(queue, head)); | 
|  | } | 
|  |  | 
|  | /* Return actual consumer index based on hardware reported queue head index. */ | 
|  | static unsigned int riscv_iommu_queue_cons(struct riscv_iommu_queue *queue) | 
|  | { | 
|  | const unsigned int cons = atomic_read(&queue->head); | 
|  | const unsigned int last = Q_ITEM(queue, cons); | 
|  | unsigned int head; | 
|  |  | 
|  | if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head, | 
|  | !(head & ~queue->mask), | 
|  | 0, RISCV_IOMMU_QUEUE_TIMEOUT)) | 
|  | return cons; | 
|  |  | 
|  | return cons + ((head - last) & queue->mask); | 
|  | } | 
|  |  | 
|  | /* Wait for submitted item to be processed. */ | 
|  | static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue, | 
|  | unsigned int index, | 
|  | unsigned int timeout_us) | 
|  | { | 
|  | unsigned int cons = atomic_read(&queue->head); | 
|  |  | 
|  | /* Already processed by the consumer */ | 
|  | if ((int)(cons - index) > 0) | 
|  | return 0; | 
|  |  | 
|  | /* Monitor consumer index */ | 
|  | return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons, | 
|  | (int)(cons - index) > 0, 0, timeout_us); | 
|  | } | 
|  |  | 
|  | /* Enqueue an entry and wait to be processed if timeout_us > 0 | 
|  | * | 
|  | * Error handling for IOMMU hardware not responding in reasonable time | 
|  | * will be added as separate patch series along with other RAS features. | 
|  | * For now, only report hardware failure and continue. | 
|  | */ | 
|  | static unsigned int riscv_iommu_queue_send(struct riscv_iommu_queue *queue, | 
|  | void *entry, size_t entry_size) | 
|  | { | 
|  | unsigned int prod; | 
|  | unsigned int head; | 
|  | unsigned int tail; | 
|  | unsigned long flags; | 
|  |  | 
|  | /* Do not preempt submission flow. */ | 
|  | local_irq_save(flags); | 
|  |  | 
|  | /* 1. Allocate some space in the queue */ | 
|  | prod = atomic_inc_return(&queue->prod) - 1; | 
|  | head = atomic_read(&queue->head); | 
|  |  | 
|  | /* 2. Wait for space availability. */ | 
|  | if ((prod - head) > queue->mask) { | 
|  | if (readx_poll_timeout(atomic_read, &queue->head, | 
|  | head, (prod - head) < queue->mask, | 
|  | 0, RISCV_IOMMU_QUEUE_TIMEOUT)) | 
|  | goto err_busy; | 
|  | } else if ((prod - head) == queue->mask) { | 
|  | const unsigned int last = Q_ITEM(queue, head); | 
|  |  | 
|  | if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head, | 
|  | !(head & ~queue->mask) && head != last, | 
|  | 0, RISCV_IOMMU_QUEUE_TIMEOUT)) | 
|  | goto err_busy; | 
|  | atomic_add((head - last) & queue->mask, &queue->head); | 
|  | } | 
|  |  | 
|  | /* 3. Store entry in the ring buffer */ | 
|  | memcpy(queue->base + Q_ITEM(queue, prod) * entry_size, entry, entry_size); | 
|  |  | 
|  | /* 4. Wait for all previous entries to be ready */ | 
|  | if (readx_poll_timeout(atomic_read, &queue->tail, tail, prod == tail, | 
|  | 0, RISCV_IOMMU_QUEUE_TIMEOUT)) | 
|  | goto err_busy; | 
|  |  | 
|  | /* | 
|  | * 5. Make sure the ring buffer update (whether in normal or I/O memory) is | 
|  | *    completed and visible before signaling the tail doorbell to fetch | 
|  | *    the next command. 'fence ow, ow' | 
|  | */ | 
|  | dma_wmb(); | 
|  | riscv_iommu_writel(queue->iommu, Q_TAIL(queue), Q_ITEM(queue, prod + 1)); | 
|  |  | 
|  | /* | 
|  | * 6. Make sure the doorbell write to the device has finished before updating | 
|  | *    the shadow tail index in normal memory. 'fence o, w' | 
|  | */ | 
|  | mmiowb(); | 
|  | atomic_inc(&queue->tail); | 
|  |  | 
|  | /* 7. Complete submission and restore local interrupts */ | 
|  | local_irq_restore(flags); | 
|  |  | 
|  | return prod; | 
|  |  | 
|  | err_busy: | 
|  | local_irq_restore(flags); | 
|  | dev_err_once(queue->iommu->dev, "Hardware error: command enqueue failed\n"); | 
|  |  | 
|  | return prod; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * IOMMU Command queue chapter 3.1 | 
|  | */ | 
|  |  | 
|  | /* Command queue interrupt handler thread function */ | 
|  | static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data) | 
|  | { | 
|  | const struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data; | 
|  | unsigned int ctrl; | 
|  |  | 
|  | /* Clear MF/CQ errors, complete error recovery to be implemented. */ | 
|  | ctrl = riscv_iommu_readl(queue->iommu, queue->qcr); | 
|  | if (ctrl & (RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO | | 
|  | RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_FENCE_W_IP)) { | 
|  | riscv_iommu_writel(queue->iommu, queue->qcr, ctrl); | 
|  | dev_warn(queue->iommu->dev, | 
|  | "Queue #%u error; fault:%d timeout:%d illegal:%d fence_w_ip:%d\n", | 
|  | queue->qid, | 
|  | !!(ctrl & RISCV_IOMMU_CQCSR_CQMF), | 
|  | !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO), | 
|  | !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL), | 
|  | !!(ctrl & RISCV_IOMMU_CQCSR_FENCE_W_IP)); | 
|  | } | 
|  |  | 
|  | /* Placeholder for command queue interrupt notifiers */ | 
|  |  | 
|  | /* Clear command interrupt pending. */ | 
|  | riscv_iommu_writel(queue->iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue)); | 
|  |  | 
|  | return IRQ_HANDLED; | 
|  | } | 
|  |  | 
|  | /* Send command to the IOMMU command queue */ | 
|  | static void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu, | 
|  | struct riscv_iommu_command *cmd) | 
|  | { | 
|  | riscv_iommu_queue_send(&iommu->cmdq, cmd, sizeof(*cmd)); | 
|  | } | 
|  |  | 
|  | /* Send IOFENCE.C command and wait for all scheduled commands to complete. */ | 
|  | static void riscv_iommu_cmd_sync(struct riscv_iommu_device *iommu, | 
|  | unsigned int timeout_us) | 
|  | { | 
|  | struct riscv_iommu_command cmd; | 
|  | unsigned int prod; | 
|  |  | 
|  | riscv_iommu_cmd_iofence(&cmd); | 
|  | prod = riscv_iommu_queue_send(&iommu->cmdq, &cmd, sizeof(cmd)); | 
|  |  | 
|  | if (!timeout_us) | 
|  | return; | 
|  |  | 
|  | if (riscv_iommu_queue_wait(&iommu->cmdq, prod, timeout_us)) | 
|  | dev_err_once(iommu->dev, | 
|  | "Hardware error: command execution timeout\n"); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * IOMMU Fault/Event queue chapter 3.2 | 
|  | */ | 
|  |  | 
|  | static void riscv_iommu_fault(struct riscv_iommu_device *iommu, | 
|  | struct riscv_iommu_fq_record *event) | 
|  | { | 
|  | unsigned int err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr); | 
|  | unsigned int devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr); | 
|  |  | 
|  | /* Placeholder for future fault handling implementation, report only. */ | 
|  | if (err) | 
|  | dev_warn_ratelimited(iommu->dev, | 
|  | "Fault %d devid: 0x%x iotval: %llx iotval2: %llx\n", | 
|  | err, devid, event->iotval, event->iotval2); | 
|  | } | 
|  |  | 
|  | /* Fault queue interrupt handler thread function */ | 
|  | static irqreturn_t riscv_iommu_fltq_process(int irq, void *data) | 
|  | { | 
|  | struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data; | 
|  | struct riscv_iommu_device *iommu = queue->iommu; | 
|  | struct riscv_iommu_fq_record *events; | 
|  | unsigned int ctrl, idx; | 
|  | int cnt, len; | 
|  |  | 
|  | events = (struct riscv_iommu_fq_record *)queue->base; | 
|  |  | 
|  | /* Clear fault interrupt pending and process all received fault events. */ | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue)); | 
|  |  | 
|  | do { | 
|  | cnt = riscv_iommu_queue_consume(queue, &idx); | 
|  | for (len = 0; len < cnt; idx++, len++) | 
|  | riscv_iommu_fault(iommu, &events[Q_ITEM(queue, idx)]); | 
|  | riscv_iommu_queue_release(queue, cnt); | 
|  | } while (cnt > 0); | 
|  |  | 
|  | /* Clear MF/OF errors, complete error recovery to be implemented. */ | 
|  | ctrl = riscv_iommu_readl(iommu, queue->qcr); | 
|  | if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) { | 
|  | riscv_iommu_writel(iommu, queue->qcr, ctrl); | 
|  | dev_warn(iommu->dev, | 
|  | "Queue #%u error; memory fault:%d overflow:%d\n", | 
|  | queue->qid, | 
|  | !!(ctrl & RISCV_IOMMU_FQCSR_FQMF), | 
|  | !!(ctrl & RISCV_IOMMU_FQCSR_FQOF)); | 
|  | } | 
|  |  | 
|  | return IRQ_HANDLED; | 
|  | } | 
|  |  | 
|  | /* Lookup and initialize device context info structure. */ | 
|  | static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu, | 
|  | unsigned int devid) | 
|  | { | 
|  | const bool base_format = !(iommu->caps & RISCV_IOMMU_CAPABILITIES_MSI_FLAT); | 
|  | unsigned int depth; | 
|  | unsigned long ddt, old, new; | 
|  | void *ptr; | 
|  | u8 ddi_bits[3] = { 0 }; | 
|  | u64 *ddtp = NULL; | 
|  |  | 
|  | /* Make sure the mode is valid */ | 
|  | if (iommu->ddt_mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL || | 
|  | iommu->ddt_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_3LVL) | 
|  | return NULL; | 
|  |  | 
|  | /* | 
|  | * Device id partitioning for base format: | 
|  | * DDI[0]: bits 0 - 6   (1st level) (7 bits) | 
|  | * DDI[1]: bits 7 - 15  (2nd level) (9 bits) | 
|  | * DDI[2]: bits 16 - 23 (3rd level) (8 bits) | 
|  | * | 
|  | * For extended format: | 
|  | * DDI[0]: bits 0 - 5   (1st level) (6 bits) | 
|  | * DDI[1]: bits 6 - 14  (2nd level) (9 bits) | 
|  | * DDI[2]: bits 15 - 23 (3rd level) (9 bits) | 
|  | */ | 
|  | if (base_format) { | 
|  | ddi_bits[0] = 7; | 
|  | ddi_bits[1] = 7 + 9; | 
|  | ddi_bits[2] = 7 + 9 + 8; | 
|  | } else { | 
|  | ddi_bits[0] = 6; | 
|  | ddi_bits[1] = 6 + 9; | 
|  | ddi_bits[2] = 6 + 9 + 9; | 
|  | } | 
|  |  | 
|  | /* Make sure device id is within range */ | 
|  | depth = iommu->ddt_mode - RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL; | 
|  | if (devid >= (1 << ddi_bits[depth])) | 
|  | return NULL; | 
|  |  | 
|  | /* Get to the level of the non-leaf node that holds the device context */ | 
|  | for (ddtp = iommu->ddt_root; depth-- > 0;) { | 
|  | const int split = ddi_bits[depth]; | 
|  | /* | 
|  | * Each non-leaf node is 64bits wide and on each level | 
|  | * nodes are indexed by DDI[depth]. | 
|  | */ | 
|  | ddtp += (devid >> split) & 0x1FF; | 
|  |  | 
|  | /* | 
|  | * Check if this node has been populated and if not | 
|  | * allocate a new level and populate it. | 
|  | */ | 
|  | do { | 
|  | ddt = READ_ONCE(*(unsigned long *)ddtp); | 
|  | if (ddt & RISCV_IOMMU_DDTE_V) { | 
|  | ddtp = __va(ppn_to_phys(ddt)); | 
|  | break; | 
|  | } | 
|  |  | 
|  | ptr = riscv_iommu_get_pages(iommu, SZ_4K); | 
|  | if (!ptr) | 
|  | return NULL; | 
|  |  | 
|  | new = phys_to_ppn(__pa(ptr)) | RISCV_IOMMU_DDTE_V; | 
|  | old = cmpxchg_relaxed((unsigned long *)ddtp, ddt, new); | 
|  |  | 
|  | if (old == ddt) { | 
|  | ddtp = (u64 *)ptr; | 
|  | break; | 
|  | } | 
|  |  | 
|  | /* Race setting DDT detected, re-read and retry. */ | 
|  | riscv_iommu_free_pages(iommu, ptr); | 
|  | } while (1); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Grab the node that matches DDI[depth], note that when using base | 
|  | * format the device context is 4 * 64bits, and the extended format | 
|  | * is 8 * 64bits, hence the (3 - base_format) below. | 
|  | */ | 
|  | ddtp += (devid & ((64 << base_format) - 1)) << (3 - base_format); | 
|  |  | 
|  | return (struct riscv_iommu_dc *)ddtp; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This is best effort IOMMU translation shutdown flow. | 
|  | * Disable IOMMU without waiting for hardware response. | 
|  | */ | 
|  | void riscv_iommu_disable(struct riscv_iommu_device *iommu) | 
|  | { | 
|  | riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, | 
|  | FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, | 
|  | RISCV_IOMMU_DDTP_IOMMU_MODE_BARE)); | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0); | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0); | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0); | 
|  | } | 
|  |  | 
|  | #define riscv_iommu_read_ddtp(iommu) ({ \ | 
|  | u64 ddtp; \ | 
|  | riscv_iommu_readq_timeout((iommu), RISCV_IOMMU_REG_DDTP, ddtp, \ | 
|  | !(ddtp & RISCV_IOMMU_DDTP_BUSY), 10, \ | 
|  | RISCV_IOMMU_DDTP_TIMEOUT); \ | 
|  | ddtp; }) | 
|  |  | 
|  | static int riscv_iommu_iodir_alloc(struct riscv_iommu_device *iommu) | 
|  | { | 
|  | u64 ddtp; | 
|  | unsigned int mode; | 
|  |  | 
|  | ddtp = riscv_iommu_read_ddtp(iommu); | 
|  | if (ddtp & RISCV_IOMMU_DDTP_BUSY) | 
|  | return -EBUSY; | 
|  |  | 
|  | /* | 
|  | * It is optional for the hardware to report a fixed address for device | 
|  | * directory root page when DDT.MODE is OFF or BARE. | 
|  | */ | 
|  | mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp); | 
|  | if (mode == RISCV_IOMMU_DDTP_IOMMU_MODE_BARE || | 
|  | mode == RISCV_IOMMU_DDTP_IOMMU_MODE_OFF) { | 
|  | /* Use WARL to discover hardware fixed DDT PPN */ | 
|  | riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, | 
|  | FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, mode)); | 
|  | ddtp = riscv_iommu_read_ddtp(iommu); | 
|  | if (ddtp & RISCV_IOMMU_DDTP_BUSY) | 
|  | return -EBUSY; | 
|  |  | 
|  | iommu->ddt_phys = ppn_to_phys(ddtp); | 
|  | if (iommu->ddt_phys) | 
|  | iommu->ddt_root = devm_ioremap(iommu->dev, | 
|  | iommu->ddt_phys, PAGE_SIZE); | 
|  | if (iommu->ddt_root) | 
|  | memset(iommu->ddt_root, 0, PAGE_SIZE); | 
|  | } | 
|  |  | 
|  | if (!iommu->ddt_root) { | 
|  | iommu->ddt_root = riscv_iommu_get_pages(iommu, SZ_4K); | 
|  | iommu->ddt_phys = __pa(iommu->ddt_root); | 
|  | } | 
|  |  | 
|  | if (!iommu->ddt_root) | 
|  | return -ENOMEM; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Discover supported DDT modes starting from requested value, | 
|  | * configure DDTP register with accepted mode and root DDT address. | 
|  | * Accepted iommu->ddt_mode is updated on success. | 
|  | */ | 
|  | static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu, | 
|  | unsigned int ddtp_mode) | 
|  | { | 
|  | struct device *dev = iommu->dev; | 
|  | u64 ddtp, rq_ddtp; | 
|  | unsigned int mode, rq_mode = ddtp_mode; | 
|  | struct riscv_iommu_command cmd; | 
|  |  | 
|  | ddtp = riscv_iommu_read_ddtp(iommu); | 
|  | if (ddtp & RISCV_IOMMU_DDTP_BUSY) | 
|  | return -EBUSY; | 
|  |  | 
|  | /* Disallow state transition from xLVL to xLVL. */ | 
|  | mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp); | 
|  | if (mode != RISCV_IOMMU_DDTP_IOMMU_MODE_BARE && | 
|  | mode != RISCV_IOMMU_DDTP_IOMMU_MODE_OFF && | 
|  | rq_mode != RISCV_IOMMU_DDTP_IOMMU_MODE_BARE && | 
|  | rq_mode != RISCV_IOMMU_DDTP_IOMMU_MODE_OFF) | 
|  | return -EINVAL; | 
|  |  | 
|  | do { | 
|  | rq_ddtp = FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, rq_mode); | 
|  | if (rq_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_BARE) | 
|  | rq_ddtp |= phys_to_ppn(iommu->ddt_phys); | 
|  |  | 
|  | riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, rq_ddtp); | 
|  | ddtp = riscv_iommu_read_ddtp(iommu); | 
|  | if (ddtp & RISCV_IOMMU_DDTP_BUSY) { | 
|  | dev_err(dev, "timeout when setting ddtp (ddt mode: %u, read: %llx)\n", | 
|  | rq_mode, ddtp); | 
|  | return -EBUSY; | 
|  | } | 
|  |  | 
|  | /* Verify IOMMU hardware accepts new DDTP config. */ | 
|  | mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp); | 
|  |  | 
|  | if (rq_mode == mode) | 
|  | break; | 
|  |  | 
|  | /* Hardware mandatory DDTP mode has not been accepted. */ | 
|  | if (rq_mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL && rq_ddtp != ddtp) { | 
|  | dev_err(dev, "DDTP update failed hw: %llx vs %llx\n", | 
|  | ddtp, rq_ddtp); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Mode field is WARL, an IOMMU may support a subset of | 
|  | * directory table levels in which case if we tried to set | 
|  | * an unsupported number of levels we'll readback either | 
|  | * a valid xLVL or off/bare. If we got off/bare, try again | 
|  | * with a smaller xLVL. | 
|  | */ | 
|  | if (mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL && | 
|  | rq_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL) { | 
|  | dev_dbg(dev, "DDTP hw mode %u vs %u\n", mode, rq_mode); | 
|  | rq_mode--; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We tried all supported modes and IOMMU hardware failed to | 
|  | * accept new settings, something went very wrong since off/bare | 
|  | * and at least one xLVL must be supported. | 
|  | */ | 
|  | dev_err(dev, "DDTP hw mode %u, failed to set %u\n", | 
|  | mode, ddtp_mode); | 
|  | return -EINVAL; | 
|  | } while (1); | 
|  |  | 
|  | iommu->ddt_mode = mode; | 
|  | if (mode != ddtp_mode) | 
|  | dev_dbg(dev, "DDTP hw mode %u, requested %u\n", mode, ddtp_mode); | 
|  |  | 
|  | /* Invalidate device context cache */ | 
|  | riscv_iommu_cmd_iodir_inval_ddt(&cmd); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  |  | 
|  | /* Invalidate address translation cache */ | 
|  | riscv_iommu_cmd_inval_vma(&cmd); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  |  | 
|  | /* IOFENCE.C */ | 
|  | riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* This struct contains protection domain specific IOMMU driver data. */ | 
|  | struct riscv_iommu_domain { | 
|  | struct iommu_domain domain; | 
|  | struct list_head bonds; | 
|  | spinlock_t lock;		/* protect bonds list updates. */ | 
|  | int pscid; | 
|  | bool amo_enabled; | 
|  | int numa_node; | 
|  | unsigned int pgd_mode; | 
|  | unsigned long *pgd_root; | 
|  | }; | 
|  |  | 
|  | #define iommu_domain_to_riscv(iommu_domain) \ | 
|  | container_of(iommu_domain, struct riscv_iommu_domain, domain) | 
|  |  | 
|  | /* Private IOMMU data for managed devices, dev_iommu_priv_* */ | 
|  | struct riscv_iommu_info { | 
|  | struct riscv_iommu_domain *domain; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Linkage between an iommu_domain and attached devices. | 
|  | * | 
|  | * Protection domain requiring IOATC and DevATC translation cache invalidations, | 
|  | * should be linked to attached devices using a riscv_iommu_bond structure. | 
|  | * Devices should be linked to the domain before first use and unlinked after | 
|  | * the translations from the referenced protection domain can no longer be used. | 
|  | * Blocking and identity domains are not tracked here, as the IOMMU hardware | 
|  | * does not cache negative and/or identity (BARE mode) translations, and DevATC | 
|  | * is disabled for those protection domains. | 
|  | * | 
|  | * The device pointer and IOMMU data remain stable in the bond struct after | 
|  | * _probe_device() where it's attached to the managed IOMMU, up to the | 
|  | * completion of the _release_device() call. The release of the bond structure | 
|  | * is synchronized with the device release. | 
|  | */ | 
|  | struct riscv_iommu_bond { | 
|  | struct list_head list; | 
|  | struct rcu_head rcu; | 
|  | struct device *dev; | 
|  | }; | 
|  |  | 
|  | static int riscv_iommu_bond_link(struct riscv_iommu_domain *domain, | 
|  | struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_device *iommu = dev_to_iommu(dev); | 
|  | struct riscv_iommu_bond *bond; | 
|  | struct list_head *bonds; | 
|  |  | 
|  | bond = kzalloc(sizeof(*bond), GFP_KERNEL); | 
|  | if (!bond) | 
|  | return -ENOMEM; | 
|  | bond->dev = dev; | 
|  |  | 
|  | /* | 
|  | * List of devices attached to the domain is arranged based on | 
|  | * managed IOMMU device. | 
|  | */ | 
|  |  | 
|  | spin_lock(&domain->lock); | 
|  | list_for_each(bonds, &domain->bonds) | 
|  | if (dev_to_iommu(list_entry(bonds, struct riscv_iommu_bond, list)->dev) == iommu) | 
|  | break; | 
|  | list_add_rcu(&bond->list, bonds); | 
|  | spin_unlock(&domain->lock); | 
|  |  | 
|  | /* Synchronize with riscv_iommu_iotlb_inval() sequence. See comment below. */ | 
|  | smp_mb(); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void riscv_iommu_bond_unlink(struct riscv_iommu_domain *domain, | 
|  | struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_device *iommu = dev_to_iommu(dev); | 
|  | struct riscv_iommu_bond *bond, *found = NULL; | 
|  | struct riscv_iommu_command cmd; | 
|  | int count = 0; | 
|  |  | 
|  | if (!domain) | 
|  | return; | 
|  |  | 
|  | spin_lock(&domain->lock); | 
|  | list_for_each_entry(bond, &domain->bonds, list) { | 
|  | if (found && count) | 
|  | break; | 
|  | else if (bond->dev == dev) | 
|  | found = bond; | 
|  | else if (dev_to_iommu(bond->dev) == iommu) | 
|  | count++; | 
|  | } | 
|  | if (found) | 
|  | list_del_rcu(&found->list); | 
|  | spin_unlock(&domain->lock); | 
|  | kfree_rcu(found, rcu); | 
|  |  | 
|  | /* | 
|  | * If this was the last bond between this domain and the IOMMU | 
|  | * invalidate all cached entries for domain's PSCID. | 
|  | */ | 
|  | if (!count) { | 
|  | riscv_iommu_cmd_inval_vma(&cmd); | 
|  | riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  |  | 
|  | riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Send IOTLB.INVAL for whole address space for ranges larger than 2MB. | 
|  | * This limit will be replaced with range invalidations, if supported by | 
|  | * the hardware, when RISC-V IOMMU architecture specification update for | 
|  | * range invalidations update will be available. | 
|  | */ | 
|  | #define RISCV_IOMMU_IOTLB_INVAL_LIMIT	(2 << 20) | 
|  |  | 
|  | static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain, | 
|  | unsigned long start, unsigned long end) | 
|  | { | 
|  | struct riscv_iommu_bond *bond; | 
|  | struct riscv_iommu_device *iommu, *prev; | 
|  | struct riscv_iommu_command cmd; | 
|  | unsigned long len = end - start + 1; | 
|  | unsigned long iova; | 
|  |  | 
|  | /* | 
|  | * For each IOMMU linked with this protection domain (via bonds->dev), | 
|  | * an IOTLB invaliation command will be submitted and executed. | 
|  | * | 
|  | * Possbile race with domain attach flow is handled by sequencing | 
|  | * bond creation - riscv_iommu_bond_link(), and device directory | 
|  | * update - riscv_iommu_iodir_update(). | 
|  | * | 
|  | * PTE Update / IOTLB Inval           Device attach & directory update | 
|  | * --------------------------         -------------------------- | 
|  | * update page table entries          add dev to the bond list | 
|  | * FENCE RW,RW                        FENCE RW,RW | 
|  | * For all IOMMUs: (can be empty)     Update FSC/PSCID | 
|  | *   FENCE IOW,IOW                      FENCE IOW,IOW | 
|  | *   IOTLB.INVAL                        IODIR.INVAL | 
|  | *   IOFENCE.C | 
|  | * | 
|  | * If bond list is not updated with new device, directory context will | 
|  | * be configured with already valid page table content. If an IOMMU is | 
|  | * linked to the protection domain it will receive invalidation | 
|  | * requests for updated page table entries. | 
|  | */ | 
|  | smp_mb(); | 
|  |  | 
|  | rcu_read_lock(); | 
|  |  | 
|  | prev = NULL; | 
|  | list_for_each_entry_rcu(bond, &domain->bonds, list) { | 
|  | iommu = dev_to_iommu(bond->dev); | 
|  |  | 
|  | /* | 
|  | * IOTLB invalidation request can be safely omitted if already sent | 
|  | * to the IOMMU for the same PSCID, and with domain->bonds list | 
|  | * arranged based on the device's IOMMU, it's sufficient to check | 
|  | * last device the invalidation was sent to. | 
|  | */ | 
|  | if (iommu == prev) | 
|  | continue; | 
|  |  | 
|  | riscv_iommu_cmd_inval_vma(&cmd); | 
|  | riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid); | 
|  | if (len && len < RISCV_IOMMU_IOTLB_INVAL_LIMIT) { | 
|  | for (iova = start; iova < end; iova += PAGE_SIZE) { | 
|  | riscv_iommu_cmd_inval_set_addr(&cmd, iova); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  | } | 
|  | } else { | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  | } | 
|  | prev = iommu; | 
|  | } | 
|  |  | 
|  | prev = NULL; | 
|  | list_for_each_entry_rcu(bond, &domain->bonds, list) { | 
|  | iommu = dev_to_iommu(bond->dev); | 
|  | if (iommu == prev) | 
|  | continue; | 
|  |  | 
|  | riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT); | 
|  | prev = iommu; | 
|  | } | 
|  | rcu_read_unlock(); | 
|  | } | 
|  |  | 
|  | #define RISCV_IOMMU_FSC_BARE 0 | 
|  |  | 
|  | /* | 
|  | * Update IODIR for the device. | 
|  | * | 
|  | * During the execution of riscv_iommu_probe_device(), IODIR entries are | 
|  | * allocated for the device's identifiers.  Device context invalidation | 
|  | * becomes necessary only if one of the updated entries was previously | 
|  | * marked as valid, given that invalid device context entries are not | 
|  | * cached by the IOMMU hardware. | 
|  | * In this implementation, updating a valid device context while the | 
|  | * device is not quiesced might be disruptive, potentially causing | 
|  | * interim translation faults. | 
|  | */ | 
|  | static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu, | 
|  | struct device *dev, u64 fsc, u64 ta) | 
|  | { | 
|  | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | 
|  | struct riscv_iommu_dc *dc; | 
|  | struct riscv_iommu_command cmd; | 
|  | bool sync_required = false; | 
|  | u64 tc; | 
|  | int i; | 
|  |  | 
|  | for (i = 0; i < fwspec->num_ids; i++) { | 
|  | dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); | 
|  | tc = READ_ONCE(dc->tc); | 
|  | if (!(tc & RISCV_IOMMU_DC_TC_V)) | 
|  | continue; | 
|  |  | 
|  | WRITE_ONCE(dc->tc, tc & ~RISCV_IOMMU_DC_TC_V); | 
|  |  | 
|  | /* Invalidate device context cached values */ | 
|  | riscv_iommu_cmd_iodir_inval_ddt(&cmd); | 
|  | riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  | sync_required = true; | 
|  | } | 
|  |  | 
|  | if (sync_required) | 
|  | riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT); | 
|  |  | 
|  | /* | 
|  | * For device context with DC_TC_PDTV = 0, translation attributes valid bit | 
|  | * is stored as DC_TC_V bit (both sharing the same location at BIT(0)). | 
|  | */ | 
|  | for (i = 0; i < fwspec->num_ids; i++) { | 
|  | dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); | 
|  | tc = READ_ONCE(dc->tc); | 
|  | tc |= ta & RISCV_IOMMU_DC_TC_V; | 
|  |  | 
|  | WRITE_ONCE(dc->fsc, fsc); | 
|  | WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID); | 
|  | /* Update device context, write TC.V as the last step. */ | 
|  | dma_wmb(); | 
|  | WRITE_ONCE(dc->tc, tc); | 
|  |  | 
|  | /* Invalidate device context after update */ | 
|  | riscv_iommu_cmd_iodir_inval_ddt(&cmd); | 
|  | riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]); | 
|  | riscv_iommu_cmd_send(iommu, &cmd); | 
|  | } | 
|  |  | 
|  | riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * IOVA page translation tree management. | 
|  | */ | 
|  |  | 
|  | static void riscv_iommu_iotlb_flush_all(struct iommu_domain *iommu_domain) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  |  | 
|  | riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX); | 
|  | } | 
|  |  | 
|  | static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain, | 
|  | struct iommu_iotlb_gather *gather) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  |  | 
|  | riscv_iommu_iotlb_inval(domain, gather->start, gather->end); | 
|  | } | 
|  |  | 
|  | #define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t))) | 
|  |  | 
|  | #define _io_pte_present(pte)	((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) | 
|  | #define _io_pte_leaf(pte)	((pte) & _PAGE_LEAF) | 
|  | #define _io_pte_none(pte)	((pte) == 0) | 
|  | #define _io_pte_entry(pn, prot)	((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot)) | 
|  |  | 
|  | static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain, | 
|  | unsigned long pte, | 
|  | struct iommu_pages_list *freelist) | 
|  | { | 
|  | unsigned long *ptr; | 
|  | int i; | 
|  |  | 
|  | if (!_io_pte_present(pte) || _io_pte_leaf(pte)) | 
|  | return; | 
|  |  | 
|  | ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); | 
|  |  | 
|  | /* Recursively free all sub page table pages */ | 
|  | for (i = 0; i < PTRS_PER_PTE; i++) { | 
|  | pte = READ_ONCE(ptr[i]); | 
|  | if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte) | 
|  | riscv_iommu_pte_free(domain, pte, freelist); | 
|  | } | 
|  |  | 
|  | if (freelist) | 
|  | iommu_pages_list_add(freelist, ptr); | 
|  | else | 
|  | iommu_free_pages(ptr); | 
|  | } | 
|  |  | 
|  | static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain, | 
|  | unsigned long iova, size_t pgsize, | 
|  | gfp_t gfp) | 
|  | { | 
|  | unsigned long *ptr = domain->pgd_root; | 
|  | unsigned long pte, old; | 
|  | int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; | 
|  | void *addr; | 
|  |  | 
|  | do { | 
|  | const int shift = PAGE_SHIFT + PT_SHIFT * level; | 
|  |  | 
|  | ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); | 
|  | /* | 
|  | * Note: returned entry might be a non-leaf if there was | 
|  | * existing mapping with smaller granularity. Up to the caller | 
|  | * to replace and invalidate. | 
|  | */ | 
|  | if (((size_t)1 << shift) == pgsize) | 
|  | return ptr; | 
|  | pte_retry: | 
|  | pte = READ_ONCE(*ptr); | 
|  | /* | 
|  | * This is very likely incorrect as we should not be adding | 
|  | * new mapping with smaller granularity on top | 
|  | * of existing 2M/1G mapping. Fail. | 
|  | */ | 
|  | if (_io_pte_present(pte) && _io_pte_leaf(pte)) | 
|  | return NULL; | 
|  | /* | 
|  | * Non-leaf entry is missing, allocate and try to add to the | 
|  | * page table. This might race with other mappings, retry. | 
|  | */ | 
|  | if (_io_pte_none(pte)) { | 
|  | addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp, | 
|  | SZ_4K); | 
|  | if (!addr) | 
|  | return NULL; | 
|  | old = pte; | 
|  | pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE); | 
|  | if (cmpxchg_relaxed(ptr, old, pte) != old) { | 
|  | iommu_free_pages(addr); | 
|  | goto pte_retry; | 
|  | } | 
|  | } | 
|  | ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); | 
|  | } while (level-- > 0); | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain, | 
|  | unsigned long iova, size_t *pte_pgsize) | 
|  | { | 
|  | unsigned long *ptr = domain->pgd_root; | 
|  | unsigned long pte; | 
|  | int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; | 
|  |  | 
|  | do { | 
|  | const int shift = PAGE_SHIFT + PT_SHIFT * level; | 
|  |  | 
|  | ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); | 
|  | pte = READ_ONCE(*ptr); | 
|  | if (_io_pte_present(pte) && _io_pte_leaf(pte)) { | 
|  | *pte_pgsize = (size_t)1 << shift; | 
|  | return ptr; | 
|  | } | 
|  | if (_io_pte_none(pte)) | 
|  | return NULL; | 
|  | ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); | 
|  | } while (level-- > 0); | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, | 
|  | unsigned long iova, phys_addr_t phys, | 
|  | size_t pgsize, size_t pgcount, int prot, | 
|  | gfp_t gfp, size_t *mapped) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  | size_t size = 0; | 
|  | unsigned long *ptr; | 
|  | unsigned long pte, old, pte_prot; | 
|  | int rc = 0; | 
|  | struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); | 
|  |  | 
|  | if (!(prot & IOMMU_WRITE)) | 
|  | pte_prot = _PAGE_BASE | _PAGE_READ; | 
|  | else if (domain->amo_enabled) | 
|  | pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE; | 
|  | else | 
|  | pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY; | 
|  |  | 
|  | while (pgcount) { | 
|  | ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp); | 
|  | if (!ptr) { | 
|  | rc = -ENOMEM; | 
|  | break; | 
|  | } | 
|  |  | 
|  | old = READ_ONCE(*ptr); | 
|  | pte = _io_pte_entry(phys_to_pfn(phys), pte_prot); | 
|  | if (cmpxchg_relaxed(ptr, old, pte) != old) | 
|  | continue; | 
|  |  | 
|  | riscv_iommu_pte_free(domain, old, &freelist); | 
|  |  | 
|  | size += pgsize; | 
|  | iova += pgsize; | 
|  | phys += pgsize; | 
|  | --pgcount; | 
|  | } | 
|  |  | 
|  | *mapped = size; | 
|  |  | 
|  | if (!iommu_pages_list_empty(&freelist)) { | 
|  | /* | 
|  | * In 1.0 spec version, the smallest scope we can use to | 
|  | * invalidate all levels of page table (i.e. leaf and non-leaf) | 
|  | * is an invalidate-all-PSCID IOTINVAL.VMA with AV=0. | 
|  | * This will be updated with hardware support for | 
|  | * capability.NL (non-leaf) IOTINVAL command. | 
|  | */ | 
|  | riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX); | 
|  | iommu_put_pages_list(&freelist); | 
|  | } | 
|  |  | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain, | 
|  | unsigned long iova, size_t pgsize, | 
|  | size_t pgcount, | 
|  | struct iommu_iotlb_gather *gather) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  | size_t size = pgcount << __ffs(pgsize); | 
|  | unsigned long *ptr, old; | 
|  | size_t unmapped = 0; | 
|  | size_t pte_size; | 
|  |  | 
|  | while (unmapped < size) { | 
|  | ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); | 
|  | if (!ptr) | 
|  | return unmapped; | 
|  |  | 
|  | /* partial unmap is not allowed, fail. */ | 
|  | if (iova & (pte_size - 1)) | 
|  | return unmapped; | 
|  |  | 
|  | old = READ_ONCE(*ptr); | 
|  | if (cmpxchg_relaxed(ptr, old, 0) != old) | 
|  | continue; | 
|  |  | 
|  | iommu_iotlb_gather_add_page(&domain->domain, gather, iova, | 
|  | pte_size); | 
|  |  | 
|  | iova += pte_size; | 
|  | unmapped += pte_size; | 
|  | } | 
|  |  | 
|  | return unmapped; | 
|  | } | 
|  |  | 
|  | static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, | 
|  | dma_addr_t iova) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  | size_t pte_size; | 
|  | unsigned long *ptr; | 
|  |  | 
|  | ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); | 
|  | if (!ptr) | 
|  | return 0; | 
|  |  | 
|  | return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1)); | 
|  | } | 
|  |  | 
|  | static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  | const unsigned long pfn = virt_to_pfn(domain->pgd_root); | 
|  |  | 
|  | WARN_ON(!list_empty(&domain->bonds)); | 
|  |  | 
|  | if ((int)domain->pscid > 0) | 
|  | ida_free(&riscv_iommu_pscids, domain->pscid); | 
|  |  | 
|  | riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL); | 
|  | kfree(domain); | 
|  | } | 
|  |  | 
|  | static bool riscv_iommu_pt_supported(struct riscv_iommu_device *iommu, int pgd_mode) | 
|  | { | 
|  | switch (pgd_mode) { | 
|  | case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: | 
|  | return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39; | 
|  |  | 
|  | case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: | 
|  | return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48; | 
|  |  | 
|  | case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: | 
|  | return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain, | 
|  | struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); | 
|  | struct riscv_iommu_device *iommu = dev_to_iommu(dev); | 
|  | struct riscv_iommu_info *info = dev_iommu_priv_get(dev); | 
|  | u64 fsc, ta; | 
|  |  | 
|  | if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode)) | 
|  | return -ENODEV; | 
|  |  | 
|  | fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) | | 
|  | FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root)); | 
|  | ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) | | 
|  | RISCV_IOMMU_PC_TA_V; | 
|  |  | 
|  | if (riscv_iommu_bond_link(domain, dev)) | 
|  | return -ENOMEM; | 
|  |  | 
|  | riscv_iommu_iodir_update(iommu, dev, fsc, ta); | 
|  | riscv_iommu_bond_unlink(info->domain, dev); | 
|  | info->domain = domain; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = { | 
|  | .attach_dev = riscv_iommu_attach_paging_domain, | 
|  | .free = riscv_iommu_free_paging_domain, | 
|  | .map_pages = riscv_iommu_map_pages, | 
|  | .unmap_pages = riscv_iommu_unmap_pages, | 
|  | .iova_to_phys = riscv_iommu_iova_to_phys, | 
|  | .iotlb_sync = riscv_iommu_iotlb_sync, | 
|  | .flush_iotlb_all = riscv_iommu_iotlb_flush_all, | 
|  | }; | 
|  |  | 
|  | static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_domain *domain; | 
|  | struct riscv_iommu_device *iommu; | 
|  | unsigned int pgd_mode; | 
|  | dma_addr_t va_mask; | 
|  | int va_bits; | 
|  |  | 
|  | iommu = dev_to_iommu(dev); | 
|  | if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) { | 
|  | pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57; | 
|  | va_bits = 57; | 
|  | } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) { | 
|  | pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48; | 
|  | va_bits = 48; | 
|  | } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) { | 
|  | pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39; | 
|  | va_bits = 39; | 
|  | } else { | 
|  | dev_err(dev, "cannot find supported page table mode\n"); | 
|  | return ERR_PTR(-ENODEV); | 
|  | } | 
|  |  | 
|  | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 
|  | if (!domain) | 
|  | return ERR_PTR(-ENOMEM); | 
|  |  | 
|  | INIT_LIST_HEAD_RCU(&domain->bonds); | 
|  | spin_lock_init(&domain->lock); | 
|  | domain->numa_node = dev_to_node(iommu->dev); | 
|  | domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD); | 
|  | domain->pgd_mode = pgd_mode; | 
|  | domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node, | 
|  | GFP_KERNEL_ACCOUNT, SZ_4K); | 
|  | if (!domain->pgd_root) { | 
|  | kfree(domain); | 
|  | return ERR_PTR(-ENOMEM); | 
|  | } | 
|  |  | 
|  | domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1, | 
|  | RISCV_IOMMU_MAX_PSCID, GFP_KERNEL); | 
|  | if (domain->pscid < 0) { | 
|  | iommu_free_pages(domain->pgd_root); | 
|  | kfree(domain); | 
|  | return ERR_PTR(-ENOMEM); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Note: RISC-V Privilege spec mandates that virtual addresses | 
|  | * need to be sign-extended, so if (VA_BITS - 1) is set, all | 
|  | * bits >= VA_BITS need to also be set or else we'll get a | 
|  | * page fault. However the code that creates the mappings | 
|  | * above us (e.g. iommu_dma_alloc_iova()) won't do that for us | 
|  | * for now, so we'll end up with invalid virtual addresses | 
|  | * to map. As a workaround until we get this sorted out | 
|  | * limit the available virtual addresses to VA_BITS - 1. | 
|  | */ | 
|  | va_mask = DMA_BIT_MASK(va_bits - 1); | 
|  |  | 
|  | domain->domain.geometry.aperture_start = 0; | 
|  | domain->domain.geometry.aperture_end = va_mask; | 
|  | domain->domain.geometry.force_aperture = true; | 
|  | domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G); | 
|  |  | 
|  | domain->domain.ops = &riscv_iommu_paging_domain_ops; | 
|  |  | 
|  | return &domain->domain; | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain, | 
|  | struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_device *iommu = dev_to_iommu(dev); | 
|  | struct riscv_iommu_info *info = dev_iommu_priv_get(dev); | 
|  |  | 
|  | /* Make device context invalid, translation requests will fault w/ #258 */ | 
|  | riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, 0); | 
|  | riscv_iommu_bond_unlink(info->domain, dev); | 
|  | info->domain = NULL; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static struct iommu_domain riscv_iommu_blocking_domain = { | 
|  | .type = IOMMU_DOMAIN_BLOCKED, | 
|  | .ops = &(const struct iommu_domain_ops) { | 
|  | .attach_dev = riscv_iommu_attach_blocking_domain, | 
|  | } | 
|  | }; | 
|  |  | 
|  | static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain, | 
|  | struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_device *iommu = dev_to_iommu(dev); | 
|  | struct riscv_iommu_info *info = dev_iommu_priv_get(dev); | 
|  |  | 
|  | riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, RISCV_IOMMU_PC_TA_V); | 
|  | riscv_iommu_bond_unlink(info->domain, dev); | 
|  | info->domain = NULL; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static struct iommu_domain riscv_iommu_identity_domain = { | 
|  | .type = IOMMU_DOMAIN_IDENTITY, | 
|  | .ops = &(const struct iommu_domain_ops) { | 
|  | .attach_dev = riscv_iommu_attach_identity_domain, | 
|  | } | 
|  | }; | 
|  |  | 
|  | static struct iommu_group *riscv_iommu_device_group(struct device *dev) | 
|  | { | 
|  | if (dev_is_pci(dev)) | 
|  | return pci_device_group(dev); | 
|  | return generic_device_group(dev); | 
|  | } | 
|  |  | 
|  | static int riscv_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args) | 
|  | { | 
|  | return iommu_fwspec_add_ids(dev, args->args, 1); | 
|  | } | 
|  |  | 
|  | static struct iommu_device *riscv_iommu_probe_device(struct device *dev) | 
|  | { | 
|  | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | 
|  | struct riscv_iommu_device *iommu; | 
|  | struct riscv_iommu_info *info; | 
|  | struct riscv_iommu_dc *dc; | 
|  | u64 tc; | 
|  | int i; | 
|  |  | 
|  | if (!fwspec || !fwspec->iommu_fwnode->dev || !fwspec->num_ids) | 
|  | return ERR_PTR(-ENODEV); | 
|  |  | 
|  | iommu = dev_get_drvdata(fwspec->iommu_fwnode->dev); | 
|  | if (!iommu) | 
|  | return ERR_PTR(-ENODEV); | 
|  |  | 
|  | /* | 
|  | * IOMMU hardware operating in fail-over BARE mode will provide | 
|  | * identity translation for all connected devices anyway... | 
|  | */ | 
|  | if (iommu->ddt_mode <= RISCV_IOMMU_DDTP_IOMMU_MODE_BARE) | 
|  | return ERR_PTR(-ENODEV); | 
|  |  | 
|  | info = kzalloc(sizeof(*info), GFP_KERNEL); | 
|  | if (!info) | 
|  | return ERR_PTR(-ENOMEM); | 
|  | /* | 
|  | * Allocate and pre-configure device context entries in | 
|  | * the device directory. Do not mark the context valid yet. | 
|  | */ | 
|  | tc = 0; | 
|  | if (iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD) | 
|  | tc |= RISCV_IOMMU_DC_TC_SADE; | 
|  | for (i = 0; i < fwspec->num_ids; i++) { | 
|  | dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); | 
|  | if (!dc) { | 
|  | kfree(info); | 
|  | return ERR_PTR(-ENODEV); | 
|  | } | 
|  | if (READ_ONCE(dc->tc) & RISCV_IOMMU_DC_TC_V) | 
|  | dev_warn(dev, "already attached to IOMMU device directory\n"); | 
|  | WRITE_ONCE(dc->tc, tc); | 
|  | } | 
|  |  | 
|  | dev_iommu_priv_set(dev, info); | 
|  |  | 
|  | return &iommu->iommu; | 
|  | } | 
|  |  | 
|  | static void riscv_iommu_release_device(struct device *dev) | 
|  | { | 
|  | struct riscv_iommu_info *info = dev_iommu_priv_get(dev); | 
|  |  | 
|  | kfree_rcu_mightsleep(info); | 
|  | } | 
|  |  | 
|  | static const struct iommu_ops riscv_iommu_ops = { | 
|  | .of_xlate = riscv_iommu_of_xlate, | 
|  | .identity_domain = &riscv_iommu_identity_domain, | 
|  | .blocked_domain = &riscv_iommu_blocking_domain, | 
|  | .release_domain = &riscv_iommu_blocking_domain, | 
|  | .domain_alloc_paging = riscv_iommu_alloc_paging_domain, | 
|  | .device_group = riscv_iommu_device_group, | 
|  | .probe_device = riscv_iommu_probe_device, | 
|  | .release_device	= riscv_iommu_release_device, | 
|  | }; | 
|  |  | 
|  | static int riscv_iommu_init_check(struct riscv_iommu_device *iommu) | 
|  | { | 
|  | u64 ddtp; | 
|  |  | 
|  | /* | 
|  | * Make sure the IOMMU is switched off or in pass-through mode during | 
|  | * regular boot flow and disable translation when we boot into a kexec | 
|  | * kernel and the previous kernel left them enabled. | 
|  | */ | 
|  | ddtp = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_DDTP); | 
|  | if (ddtp & RISCV_IOMMU_DDTP_BUSY) | 
|  | return -EBUSY; | 
|  |  | 
|  | if (FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp) > | 
|  | RISCV_IOMMU_DDTP_IOMMU_MODE_BARE) { | 
|  | if (!is_kdump_kernel()) | 
|  | return -EBUSY; | 
|  | riscv_iommu_disable(iommu); | 
|  | } | 
|  |  | 
|  | /* Configure accesses to in-memory data structures for CPU-native byte order. */ | 
|  | if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) != | 
|  | !!(iommu->fctl & RISCV_IOMMU_FCTL_BE)) { | 
|  | if (!(iommu->caps & RISCV_IOMMU_CAPABILITIES_END)) | 
|  | return -EINVAL; | 
|  | riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, | 
|  | iommu->fctl ^ RISCV_IOMMU_FCTL_BE); | 
|  | iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL); | 
|  | if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) != | 
|  | !!(iommu->fctl & RISCV_IOMMU_FCTL_BE)) | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Distribute interrupt vectors, always use first vector for CIV. | 
|  | * At least one interrupt is required. Read back and verify. | 
|  | */ | 
|  | if (!iommu->irqs_count) | 
|  | return -EINVAL; | 
|  |  | 
|  | iommu->icvec = FIELD_PREP(RISCV_IOMMU_ICVEC_FIV, 1 % iommu->irqs_count) | | 
|  | FIELD_PREP(RISCV_IOMMU_ICVEC_PIV, 2 % iommu->irqs_count) | | 
|  | FIELD_PREP(RISCV_IOMMU_ICVEC_PMIV, 3 % iommu->irqs_count); | 
|  | riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_ICVEC, iommu->icvec); | 
|  | iommu->icvec = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_ICVEC); | 
|  | if (max(max(FIELD_GET(RISCV_IOMMU_ICVEC_CIV, iommu->icvec), | 
|  | FIELD_GET(RISCV_IOMMU_ICVEC_FIV, iommu->icvec)), | 
|  | max(FIELD_GET(RISCV_IOMMU_ICVEC_PIV, iommu->icvec), | 
|  | FIELD_GET(RISCV_IOMMU_ICVEC_PMIV, iommu->icvec))) >= iommu->irqs_count) | 
|  | return -EINVAL; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void riscv_iommu_remove(struct riscv_iommu_device *iommu) | 
|  | { | 
|  | iommu_device_unregister(&iommu->iommu); | 
|  | iommu_device_sysfs_remove(&iommu->iommu); | 
|  | riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF); | 
|  | riscv_iommu_queue_disable(&iommu->cmdq); | 
|  | riscv_iommu_queue_disable(&iommu->fltq); | 
|  | } | 
|  |  | 
|  | int riscv_iommu_init(struct riscv_iommu_device *iommu) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | RISCV_IOMMU_QUEUE_INIT(&iommu->cmdq, CQ); | 
|  | RISCV_IOMMU_QUEUE_INIT(&iommu->fltq, FQ); | 
|  |  | 
|  | rc = riscv_iommu_init_check(iommu); | 
|  | if (rc) | 
|  | return dev_err_probe(iommu->dev, rc, "unexpected device state\n"); | 
|  |  | 
|  | rc = riscv_iommu_iodir_alloc(iommu); | 
|  | if (rc) | 
|  | return rc; | 
|  |  | 
|  | rc = riscv_iommu_queue_alloc(iommu, &iommu->cmdq, | 
|  | sizeof(struct riscv_iommu_command)); | 
|  | if (rc) | 
|  | return rc; | 
|  |  | 
|  | rc = riscv_iommu_queue_alloc(iommu, &iommu->fltq, | 
|  | sizeof(struct riscv_iommu_fq_record)); | 
|  | if (rc) | 
|  | return rc; | 
|  |  | 
|  | rc = riscv_iommu_queue_enable(iommu, &iommu->cmdq, riscv_iommu_cmdq_process); | 
|  | if (rc) | 
|  | return rc; | 
|  |  | 
|  | rc = riscv_iommu_queue_enable(iommu, &iommu->fltq, riscv_iommu_fltq_process); | 
|  | if (rc) | 
|  | goto err_queue_disable; | 
|  |  | 
|  | rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_MAX); | 
|  | if (rc) | 
|  | goto err_queue_disable; | 
|  |  | 
|  | rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s", | 
|  | dev_name(iommu->dev)); | 
|  | if (rc) { | 
|  | dev_err_probe(iommu->dev, rc, "cannot register sysfs interface\n"); | 
|  | goto err_iodir_off; | 
|  | } | 
|  |  | 
|  | rc = iommu_device_register(&iommu->iommu, &riscv_iommu_ops, iommu->dev); | 
|  | if (rc) { | 
|  | dev_err_probe(iommu->dev, rc, "cannot register iommu interface\n"); | 
|  | goto err_remove_sysfs; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_remove_sysfs: | 
|  | iommu_device_sysfs_remove(&iommu->iommu); | 
|  | err_iodir_off: | 
|  | riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF); | 
|  | err_queue_disable: | 
|  | riscv_iommu_queue_disable(&iommu->fltq); | 
|  | riscv_iommu_queue_disable(&iommu->cmdq); | 
|  | return rc; | 
|  | } |