blob: a9938006d0e6ec5532adb185d9bb15437cce076b [file]
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2025 Arm Ltd.
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
#include <linux/arm_mpam.h>
#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/math.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/resctrl.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <asm/mpam.h>
#include "mpam_internal.h"
DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
/*
* The classes we've picked to map to resctrl resources, wrapped
* in with their resctrl structure.
* Class pointer may be NULL.
*/
static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
#define for_each_mpam_resctrl_control(res, rid) \
for (rid = 0, res = &mpam_resctrl_controls[rid]; \
rid < RDT_NUM_RESOURCES; \
rid++, res = &mpam_resctrl_controls[rid])
/*
* The classes we've picked to map to resctrl events.
* Resctrl believes all the worlds a Xeon, and these are all on the L3. This
* array lets us find the actual class backing the event counters. e.g.
* the only memory bandwidth counters may be on the memory controller, but to
* make use of them, we pretend they are on L3. Restrict the events considered
* to those supported by MPAM.
* Class pointer may be NULL.
*/
#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
#define for_each_mpam_resctrl_mon(mon, eventid) \
for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \
eventid <= MPAM_MAX_EVENT; \
eventid++, mon = &mpam_resctrl_counters[eventid])
/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
static DEFINE_MUTEX(domain_list_lock);
/*
* MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
* This applies globally to all traffic the CPU generates.
*/
static bool cdp_enabled;
/*
* We use cacheinfo to discover the size of the caches and their id. cacheinfo
* populates this from a device_initcall(). mpam_resctrl_setup() must wait.
*/
static bool cacheinfo_ready;
static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
/*
* If resctrl_init() succeeded, resctrl_exit() can be used to remove support
* for the filesystem in the event of an error.
*/
static bool resctrl_enabled;
bool resctrl_arch_alloc_capable(void)
{
struct mpam_resctrl_res *res;
enum resctrl_res_level rid;
for_each_mpam_resctrl_control(res, rid) {
if (res->resctrl_res.alloc_capable)
return true;
}
return false;
}
bool resctrl_arch_mon_capable(void)
{
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
struct rdt_resource *l3 = &res->resctrl_res;
/* All monitors are presented as being on the L3 cache */
return l3->mon_capable;
}
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
{
return false;
}
void resctrl_arch_mon_event_config_read(void *info)
{
}
void resctrl_arch_mon_event_config_write(void *info)
{
}
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
{
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid)
{
}
void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
u32 closid, u32 rmid, int cntr_id,
enum resctrl_event_id eventid)
{
}
void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
enum resctrl_event_id evtid, u32 rmid, u32 closid,
u32 cntr_id, bool assign)
{
}
int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
u32 unused, u32 rmid, int cntr_id,
enum resctrl_event_id eventid, u64 *val)
{
return -EOPNOTSUPP;
}
bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
{
return false;
}
int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
{
return -EINVAL;
}
int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
{
return -EOPNOTSUPP;
}
bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
{
return false;
}
void resctrl_arch_pre_mount(void)
{
}
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
{
return mpam_resctrl_controls[rid].cdp_enabled;
}
/**
* resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
*
* At boot, all existing tasks use partid zero for D and I.
* To enable/disable CDP emulation, all these tasks need relabelling.
*/
static void resctrl_reset_task_closids(void)
{
struct task_struct *p, *t;
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
RESCTRL_RESERVED_RMID);
}
read_unlock(&tasklist_lock);
}
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
{
u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
struct rdt_resource *l3 = &res->resctrl_res;
int cpu;
if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
/*
* If the resctrl fs is mounted more than once, sequentially,
* then CDP can lead to the use of out of range PARTIDs.
*/
pr_warn("CDP not supported\n");
return -EOPNOTSUPP;
}
if (enable)
pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n");
/*
* resctrl_arch_set_cdp_enabled() is only called with enable set to
* false on error and unmount.
*/
cdp_enabled = enable;
mpam_resctrl_controls[rid].cdp_enabled = enable;
if (enable)
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
else
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
if (enable) {
if (mpam_partid_max < 1)
return -EINVAL;
partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
}
mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
resctrl_reset_task_closids();
for_each_possible_cpu(cpu)
mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
return 0;
}
static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
{
return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
}
/*
* MSC may raise an error interrupt if it sees an out or range partid/pmg,
* and go on to truncate the value. Regardless of what the hardware supports,
* only the system wide safe value is safe to use.
*/
u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
{
return mpam_partid_max + 1;
}
u32 resctrl_arch_system_num_rmid_idx(void)
{
return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
}
u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
{
return closid * (mpam_pmg_max + 1) + rmid;
}
void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
{
*closid = idx / (mpam_pmg_max + 1);
*rmid = idx % (mpam_pmg_max + 1);
}
void resctrl_arch_sched_in(struct task_struct *tsk)
{
lockdep_assert_preemption_disabled();
mpam_thread_switch(tsk);
}
void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
{
WARN_ON_ONCE(closid > U16_MAX);
WARN_ON_ONCE(rmid > U8_MAX);
if (!cdp_enabled) {
mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
} else {
/*
* When CDP is enabled, resctrl halves the closid range and we
* use odd/even partid for one closid.
*/
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
}
}
void resctrl_arch_sync_cpu_closid_rmid(void *info)
{
struct resctrl_cpu_defaults *r = info;
lockdep_assert_preemption_disabled();
if (r) {
resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
r->closid, r->rmid);
}
resctrl_arch_sched_in(current);
}
void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
{
WARN_ON_ONCE(closid > U16_MAX);
WARN_ON_ONCE(rmid > U8_MAX);
if (!cdp_enabled) {
mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
} else {
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
}
}
bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
{
u64 regval = mpam_get_regval(tsk);
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
if (cdp_enabled)
tsk_closid >>= 1;
return tsk_closid == closid;
}
/* The task's pmg is not unique, the partid must be considered too */
bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
{
u64 regval = mpam_get_regval(tsk);
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
if (cdp_enabled)
tsk_closid >>= 1;
return (tsk_closid == closid) && (tsk_rmid == rmid);
}
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
{
if (l >= RDT_NUM_RESOURCES)
return NULL;
return &mpam_resctrl_controls[l].resctrl_res;
}
static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
{
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
if (!mpam_is_enabled())
return -EINVAL;
if (!mon->class)
return -EINVAL;
switch (evtid) {
case QOS_L3_OCCUP_EVENT_ID:
/* With CDP, one monitor gets used for both code/data reads */
return mpam_alloc_csu_mon(mon->class);
case QOS_L3_MBM_LOCAL_EVENT_ID:
case QOS_L3_MBM_TOTAL_EVENT_ID:
return USE_PRE_ALLOCATED;
default:
return -EOPNOTSUPP;
}
}
void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
enum resctrl_event_id evtid)
{
DEFINE_WAIT(wait);
int *ret;
ret = kmalloc_obj(*ret);
if (!ret)
return ERR_PTR(-ENOMEM);
do {
prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
TASK_INTERRUPTIBLE);
*ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
if (*ret == -ENOSPC)
schedule();
} while (*ret == -ENOSPC && !signal_pending(current));
finish_wait(&resctrl_mon_ctx_waiters, &wait);
return ret;
}
static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
u32 mon_idx)
{
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
if (!mpam_is_enabled())
return;
if (!mon->class)
return;
if (evtid == QOS_L3_OCCUP_EVENT_ID)
mpam_free_csu_mon(mon->class, mon_idx);
wake_up(&resctrl_mon_ctx_waiters);
}
void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
enum resctrl_event_id evtid, void *arch_mon_ctx)
{
u32 mon_idx = *(u32 *)arch_mon_ctx;
kfree(arch_mon_ctx);
resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
}
static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
enum mpam_device_features mon_type,
int mon_idx,
enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
{
struct mon_cfg cfg;
if (!mpam_is_enabled())
return -EINVAL;
/* Shift closid to account for CDP */
closid = resctrl_get_config_index(closid, cdp_type);
if (irqs_disabled()) {
/* Check if we can access this domain without an IPI */
return -EIO;
}
cfg = (struct mon_cfg) {
.mon = mon_idx,
.match_pmg = true,
.partid = closid,
.pmg = rmid,
};
return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
}
static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
enum mpam_device_features mon_type,
int mon_idx, u32 closid, u32 rmid, u64 *val)
{
if (cdp_enabled) {
u64 code_val = 0, data_val = 0;
int err;
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
CDP_CODE, closid, rmid, &code_val);
if (err)
return err;
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
CDP_DATA, closid, rmid, &data_val);
if (err)
return err;
*val += code_val + data_val;
return 0;
}
return __read_mon(mon, mon_comp, mon_type, mon_idx,
CDP_NONE, closid, rmid, val);
}
/* MBWU when not in ABMC mode (not supported), and CSU counters. */
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
void *arch_priv, u64 *val, void *arch_mon_ctx)
{
struct mpam_resctrl_dom *l3_dom;
struct mpam_component *mon_comp;
u32 mon_idx = *(u32 *)arch_mon_ctx;
enum mpam_device_features mon_type;
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
resctrl_arch_rmid_read_context_check();
if (!mpam_is_enabled())
return -EINVAL;
if (eventid >= QOS_NUM_EVENTS || !mon->class)
return -EINVAL;
l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
mon_comp = l3_dom->mon_comp[eventid];
if (eventid != QOS_L3_OCCUP_EVENT_ID)
return -EINVAL;
mon_type = mpam_feat_msmon_csu;
return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
closid, rmid, val);
}
/*
* The rmid realloc threshold should be for the smallest cache exposed to
* resctrl.
*/
static int update_rmid_limits(struct mpam_class *class)
{
u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
struct mpam_props *cprops = &class->props;
struct cacheinfo *ci;
lockdep_assert_cpus_held();
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
return 0;
/*
* Assume cache levels are the same size for all CPUs...
* The check just requires any online CPU and it can't go offline as we
* hold the cpu lock.
*/
ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level);
if (!ci || ci->size == 0) {
pr_debug("Could not read cache size for class %u\n",
class->level);
return -EINVAL;
}
if (!resctrl_rmid_realloc_limit ||
ci->size < resctrl_rmid_realloc_limit) {
resctrl_rmid_realloc_limit = ci->size;
resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg;
}
return 0;
}
static bool cache_has_usable_cpor(struct mpam_class *class)
{
struct mpam_props *cprops = &class->props;
if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
return false;
/* resctrl uses u32 for all bitmap configurations */
return class->props.cpbm_wd <= 32;
}
static bool mba_class_use_mbw_max(struct mpam_props *cprops)
{
return (mpam_has_feature(mpam_feat_mbw_max, cprops) &&
cprops->bwa_wd);
}
static bool class_has_usable_mba(struct mpam_props *cprops)
{
return mba_class_use_mbw_max(cprops);
}
static bool cache_has_usable_csu(struct mpam_class *class)
{
struct mpam_props *cprops;
if (!class)
return false;
cprops = &class->props;
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
return false;
/*
* CSU counters settle on the value, so we can get away with
* having only one.
*/
if (!cprops->num_csu_mon)
return false;
return true;
}
/*
* Calculate the worst-case percentage change from each implemented step
* in the control.
*/
static u32 get_mba_granularity(struct mpam_props *cprops)
{
if (!mba_class_use_mbw_max(cprops))
return 0;
/*
* bwa_wd is the number of bits implemented in the 0.xxx
* fixed point fraction. 1 bit is 50%, 2 is 25% etc.
*/
return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd);
}
/*
* Each fixed-point hardware value architecturally represents a range
* of values: the full range 0% - 100% is split contiguously into
* (1 << cprops->bwa_wd) equal bands.
*
* Although the bwa_bwd fields have 6 bits the maximum valid value is 16
* as it reports the width of fields that are at most 16 bits. When
* fewer than 16 bits are valid the least significant bits are
* ignored. The implied binary point is kept between bits 15 and 16 and
* so the valid bits are leftmost.
*
* See ARM IHI0099B.a "MPAM system component specification", Section 9.3,
* "The fixed-point fractional format" for more information.
*
* Find the nearest percentage value to the upper bound of the selected band:
*/
static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
{
u32 val = mbw_max;
val >>= 16 - cprops->bwa_wd;
val += 1;
val *= MAX_MBA_BW;
val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd);
return val;
}
/*
* Find the band whose upper bound is closest to the specified percentage.
*
* A round-to-nearest policy is followed here as a balanced compromise
* between unexpected under-commit of the resource (where the total of
* a set of resource allocations after conversion is less than the
* expected total, due to rounding of the individual converted
* percentages) and over-commit (where the total of the converted
* allocations is greater than expected).
*/
static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
{
u32 val = pc;
val <<= cprops->bwa_wd;
val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
val = max(val, 1) - 1;
val <<= 16 - cprops->bwa_wd;
return val;
}
static u32 get_mba_min(struct mpam_props *cprops)
{
if (!mba_class_use_mbw_max(cprops)) {
WARN_ON_ONCE(1);
return 0;
}
return mbw_max_to_percent(0, cprops);
}
/* Find the L3 cache that has affinity with this CPU */
static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask)
{
u32 cache_id = get_cpu_cacheinfo_id(cpu, 3);
lockdep_assert_cpus_held();
return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask);
}
/*
* topology_matches_l3() - Is the provided class the same shape as L3
* @victim: The class we'd like to pretend is L3.
*
* resctrl expects all the world's a Xeon, and all counters are on the
* L3. We allow some mapping counters on other classes. This requires
* that the CPU->domain mapping is the same kind of shape.
*
* Using cacheinfo directly would make this work even if resctrl can't
* use the L3 - but cacheinfo can't tell us anything about offline CPUs.
* Using the L3 resctrl domain list also depends on CPUs being online.
* Using the mpam_class we picked for L3 so we can use its domain list
* assumes that there are MPAM controls on the L3.
* Instead, this path eventually uses the mpam_get_cpumask_from_cache_id()
* helper which can tell us about offline CPUs ... but getting the cache_id
* to start with relies on at least one CPU per L3 cache being online at
* boot.
*
* Walk the victim component list and compare the affinity mask with the
* corresponding L3. The topology matches if each victim:component's affinity
* mask is the same as the CPU's corresponding L3's. These lists/masks are
* computed from firmware tables so don't change at runtime.
*/
static bool topology_matches_l3(struct mpam_class *victim)
{
int cpu, err;
struct mpam_component *victim_iter;
lockdep_assert_cpus_held();
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
return false;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(victim_iter, &victim->components, class_list,
srcu_read_lock_held(&mpam_srcu)) {
if (cpumask_empty(&victim_iter->affinity)) {
pr_debug("class %u has CPU-less component %u - can't match L3!\n",
victim->level, victim_iter->comp_id);
return false;
}
cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
return false;
cpumask_clear(tmp_cpumask);
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
if (err) {
pr_debug("Failed to find L3's equivalent component to class %u component %u\n",
victim->level, victim_iter->comp_id);
return false;
}
/* Any differing bits in the affinity mask? */
if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) {
pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n"
"L3:%*pbl != victim:%*pbl\n",
victim->level, victim_iter->comp_id,
cpumask_pr_args(tmp_cpumask),
cpumask_pr_args(&victim_iter->affinity));
return false;
}
}
return true;
}
/*
* Test if the traffic for a class matches that at egress from the L3. For
* MSC at memory controllers this is only possible if there is a single L3
* as otherwise the counters at the memory can include bandwidth from the
* non-local L3.
*/
static bool traffic_matches_l3(struct mpam_class *class)
{
int err, cpu;
lockdep_assert_cpus_held();
if (class->type == MPAM_CLASS_CACHE && class->level == 3)
return true;
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
pr_debug("class %u is a different cache from L3\n", class->level);
return false;
}
if (class->type != MPAM_CLASS_MEMORY) {
pr_debug("class %u is neither of type cache or memory\n", class->level);
return false;
}
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
pr_debug("cpumask allocation failed\n");
return false;
}
cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
if (err) {
pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
return false;
}
if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
pr_debug("There is more than one L3\n");
return false;
}
/* Be strict; the traffic might stop in the intermediate cache. */
if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
pr_debug("L3 isn't the last level of cache\n");
return false;
}
if (num_possible_nodes() > 1) {
pr_debug("There is more than one numa node\n");
return false;
}
#ifdef CONFIG_HMEM_REPORTING
if (node_devices[cpu_to_node(cpu)]->cache_dev) {
pr_debug("There is a memory side cache\n");
return false;
}
#endif
return true;
}
/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
static void mpam_resctrl_pick_caches(void)
{
struct mpam_class *class;
struct mpam_resctrl_res *res;
lockdep_assert_cpus_held();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
srcu_read_lock_held(&mpam_srcu)) {
if (class->type != MPAM_CLASS_CACHE) {
pr_debug("class %u is not a cache\n", class->level);
continue;
}
if (class->level != 2 && class->level != 3) {
pr_debug("class %u is not L2 or L3\n", class->level);
continue;
}
if (!cache_has_usable_cpor(class)) {
pr_debug("class %u cache misses CPOR\n", class->level);
continue;
}
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level,
cpumask_pr_args(&class->affinity),
cpumask_pr_args(cpu_possible_mask));
continue;
}
if (class->level == 2)
res = &mpam_resctrl_controls[RDT_RESOURCE_L2];
else
res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
res->class = class;
}
}
static void mpam_resctrl_pick_mba(void)
{
struct mpam_class *class, *candidate_class = NULL;
struct mpam_resctrl_res *res;
lockdep_assert_cpus_held();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
srcu_read_lock_held(&mpam_srcu)) {
struct mpam_props *cprops = &class->props;
if (class->level != 3 && class->type == MPAM_CLASS_CACHE) {
pr_debug("class %u is a cache but not the L3\n", class->level);
continue;
}
if (!class_has_usable_mba(cprops)) {
pr_debug("class %u has no bandwidth control\n",
class->level);
continue;
}
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
pr_debug("class %u has missing CPUs\n", class->level);
continue;
}
if (!topology_matches_l3(class)) {
pr_debug("class %u topology doesn't match L3\n",
class->level);
continue;
}
if (!traffic_matches_l3(class)) {
pr_debug("class %u traffic doesn't match L3 egress\n",
class->level);
continue;
}
/*
* Pick a resource to be MBA that as close as possible to
* the L3. mbm_total counts the bandwidth leaving the L3
* cache and MBA should correspond as closely as possible
* for proper operation of mba_sc.
*/
if (!candidate_class || class->level < candidate_class->level)
candidate_class = class;
}
if (candidate_class) {
pr_debug("selected class %u to back MBA\n",
candidate_class->level);
res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
res->class = candidate_class;
}
}
static void counter_update_class(enum resctrl_event_id evt_id,
struct mpam_class *class)
{
struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class;
if (existing_class) {
if (class->level == 3) {
pr_debug("Existing class is L3 - L3 wins\n");
return;
}
if (existing_class->level < class->level) {
pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n",
existing_class->level, class->level);
return;
}
}
mpam_resctrl_counters[evt_id].class = class;
}
static void mpam_resctrl_pick_counters(void)
{
struct mpam_class *class;
lockdep_assert_cpus_held();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
srcu_read_lock_held(&mpam_srcu)) {
/* The name of the resource is L3... */
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
pr_debug("class %u is a cache but not the L3", class->level);
continue;
}
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
pr_debug("class %u does not cover all CPUs",
class->level);
continue;
}
if (cache_has_usable_csu(class)) {
pr_debug("class %u has usable CSU",
class->level);
/* CSU counters only make sense on a cache. */
switch (class->type) {
case MPAM_CLASS_CACHE:
if (update_rmid_limits(class))
break;
counter_update_class(QOS_L3_OCCUP_EVENT_ID, class);
break;
default:
break;
}
}
}
}
static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
{
struct mpam_class *class = res->class;
struct mpam_props *cprops = &class->props;
struct rdt_resource *r = &res->resctrl_res;
switch (r->rid) {
case RDT_RESOURCE_L2:
case RDT_RESOURCE_L3:
r->schema_fmt = RESCTRL_SCHEMA_BITMAP;
r->cache.arch_has_sparse_bitmasks = true;
r->cache.cbm_len = class->props.cpbm_wd;
/* mpam_devices will reject empty bitmaps */
r->cache.min_cbm_bits = 1;
if (r->rid == RDT_RESOURCE_L2) {
r->name = "L2";
r->ctrl_scope = RESCTRL_L2_CACHE;
r->cdp_capable = true;
} else {
r->name = "L3";
r->ctrl_scope = RESCTRL_L3_CACHE;
r->cdp_capable = true;
}
/*
* Which bits are shared with other ...things... Unknown
* devices use partid-0 which uses all the bitmap fields. Until
* we have configured the SMMU and GIC not to do this 'all the
* bits' is the correct answer here.
*/
r->cache.shareable_bits = resctrl_get_default_ctrl(r);
r->alloc_capable = true;
break;
case RDT_RESOURCE_MBA:
r->schema_fmt = RESCTRL_SCHEMA_RANGE;
r->ctrl_scope = RESCTRL_L3_CACHE;
r->membw.delay_linear = true;
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
r->membw.min_bw = get_mba_min(cprops);
r->membw.max_bw = MAX_MBA_BW;
r->membw.bw_gran = get_mba_granularity(cprops);
r->name = "MB";
r->alloc_capable = true;
break;
default:
return -EINVAL;
}
return 0;
}
static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
{
struct mpam_class *class = comp->class;
if (class->type == MPAM_CLASS_CACHE)
return comp->comp_id;
if (topology_matches_l3(class)) {
/* Use the corresponding L3 component ID as the domain ID */
int id = get_cpu_cacheinfo_id(cpu, 3);
/* Implies topology_matches_l3() made a mistake */
if (WARN_ON_ONCE(id == -1))
return comp->comp_id;
return id;
}
/* Otherwise, expose the ID used by the firmware table code. */
return comp->comp_id;
}
static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
enum resctrl_event_id type)
{
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
struct rdt_resource *l3 = &res->resctrl_res;
lockdep_assert_cpus_held();
/*
* There also needs to be an L3 cache present.
* The check just requires any online CPU and it can't go offline as we
* hold the cpu lock.
*/
if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
return 0;
/*
* If there are no MPAM resources on L3, force it into existence.
* topology_matches_l3() already ensures this looks like the L3.
* The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
*/
if (!res->class) {
pr_warn_once("Faking L3 MSC to enable counters.\n");
res->class = mpam_resctrl_counters[type].class;
}
/*
* Called multiple times!, once per event type that has a
* monitoring class.
* Setting name is necessary on monitor only platforms.
*/
l3->name = "L3";
l3->mon_scope = RESCTRL_L3_CACHE;
/*
* num-rmid is the upper bound for the number of monitoring groups that
* can exist simultaneously, including the default monitoring group for
* each control group. Hence, advertise the whole rmid_idx space even
* though each control group has its own pmg/rmid space. Unfortunately,
* this does mean userspace needs to know the architecture to correctly
* interpret this value.
*/
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
if (resctrl_enable_mon_event(type, false, 0, NULL))
l3->mon_capable = true;
return 0;
}
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
u32 partid;
struct mpam_config *cfg;
struct mpam_props *cprops;
struct mpam_resctrl_res *res;
struct mpam_resctrl_dom *dom;
enum mpam_device_features configured_by;
lockdep_assert_cpus_held();
if (!mpam_is_enabled())
return resctrl_get_default_ctrl(r);
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
cprops = &res->class->props;
/*
* When CDP is enabled, but the resource doesn't support it,
* the control is cloned across both partids.
* Pick one at random to read:
*/
if (mpam_resctrl_hide_cdp(r->rid))
type = CDP_DATA;
partid = resctrl_get_config_index(closid, type);
cfg = &dom->ctrl_comp->cfg[partid];
switch (r->rid) {
case RDT_RESOURCE_L2:
case RDT_RESOURCE_L3:
configured_by = mpam_feat_cpor_part;
break;
case RDT_RESOURCE_MBA:
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
configured_by = mpam_feat_mbw_max;
break;
}
fallthrough;
default:
return resctrl_get_default_ctrl(r);
}
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
!mpam_has_feature(configured_by, cfg))
return resctrl_get_default_ctrl(r);
switch (configured_by) {
case mpam_feat_cpor_part:
return cfg->cpbm;
case mpam_feat_mbw_max:
return mbw_max_to_percent(cfg->mbw_max, cprops);
default:
return resctrl_get_default_ctrl(r);
}
}
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
int err;
u32 partid;
struct mpam_config cfg;
struct mpam_props *cprops;
struct mpam_resctrl_res *res;
struct mpam_resctrl_dom *dom;
lockdep_assert_cpus_held();
lockdep_assert_irqs_enabled();
if (!mpam_is_enabled())
return -EINVAL;
/*
* No need to check the CPU as mpam_apply_config() doesn't care, and
* resctrl_arch_update_domains() relies on this.
*/
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
cprops = &res->class->props;
if (mpam_resctrl_hide_cdp(r->rid))
t = CDP_DATA;
partid = resctrl_get_config_index(closid, t);
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
pr_debug("Not alloc capable or computed PARTID out of range\n");
return -EINVAL;
}
/*
* Copy the current config to avoid clearing other resources when the
* same component is exposed multiple times through resctrl.
*/
cfg = dom->ctrl_comp->cfg[partid];
switch (r->rid) {
case RDT_RESOURCE_L2:
case RDT_RESOURCE_L3:
cfg.cpbm = cfg_val;
mpam_set_feature(mpam_feat_cpor_part, &cfg);
break;
case RDT_RESOURCE_MBA:
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
mpam_set_feature(mpam_feat_mbw_max, &cfg);
break;
}
fallthrough;
default:
return -EINVAL;
}
/*
* When CDP is enabled, but the resource doesn't support it, we need to
* apply the same configuration to the other partid.
*/
if (mpam_resctrl_hide_cdp(r->rid)) {
partid = resctrl_get_config_index(closid, CDP_CODE);
err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
if (err)
return err;
partid = resctrl_get_config_index(closid, CDP_DATA);
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
}
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
}
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
{
int err;
struct rdt_ctrl_domain *d;
lockdep_assert_cpus_held();
lockdep_assert_irqs_enabled();
if (!mpam_is_enabled())
return -EINVAL;
list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) {
for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) {
struct resctrl_staged_config *cfg = &d->staged_config[t];
if (!cfg->have_new_ctrl)
continue;
err = resctrl_arch_update_one(r, d, closid, t,
cfg->new_ctrl);
if (err)
return err;
}
}
return 0;
}
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
{
struct mpam_resctrl_res *res;
lockdep_assert_cpus_held();
if (!mpam_is_enabled())
return;
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
mpam_reset_class_locked(res->class);
}
static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
enum resctrl_res_level rid,
struct rdt_domain_hdr *hdr)
{
lockdep_assert_cpus_held();
INIT_LIST_HEAD(&hdr->list);
hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
hdr->rid = rid;
cpumask_set_cpu(cpu, &hdr->cpu_mask);
}
static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
struct rdt_domain_hdr *hdr)
{
lockdep_assert_cpus_held();
cpumask_set_cpu(cpu, &hdr->cpu_mask);
}
/**
* mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
* @cpu: The CPU to remove from the domain.
* @hdr: The domain's header.
*
* Removes @cpu from the header mask. If this was the last CPU in the domain,
* the domain header is removed from its parent list and true is returned,
* indicating the parent structure can be freed.
* If there are other CPUs in the domain, returns false.
*/
static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
struct rdt_domain_hdr *hdr)
{
lockdep_assert_held(&domain_list_lock);
cpumask_clear_cpu(cpu, &hdr->cpu_mask);
if (cpumask_empty(&hdr->cpu_mask)) {
list_del_rcu(&hdr->list);
synchronize_rcu();
return true;
}
return false;
}
static void mpam_resctrl_domain_insert(struct list_head *list,
struct rdt_domain_hdr *new)
{
struct rdt_domain_hdr *err;
struct list_head *pos = NULL;
lockdep_assert_held(&domain_list_lock);
err = resctrl_find_domain(list, new->id, &pos);
if (WARN_ON_ONCE(err))
return;
list_add_tail_rcu(&new->list, pos);
}
static struct mpam_component *find_component(struct mpam_class *class, int cpu)
{
struct mpam_component *comp;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(comp, &class->components, class_list,
srcu_read_lock_held(&mpam_srcu)) {
if (cpumask_test_cpu(cpu, &comp->affinity))
return comp;
}
return NULL;
}
static struct mpam_resctrl_dom *
mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
{
int err;
struct mpam_resctrl_dom *dom;
struct rdt_l3_mon_domain *mon_d;
struct rdt_ctrl_domain *ctrl_d;
struct mpam_class *class = res->class;
struct mpam_component *comp_iter, *ctrl_comp;
struct rdt_resource *r = &res->resctrl_res;
lockdep_assert_held(&domain_list_lock);
ctrl_comp = NULL;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(comp_iter, &class->components, class_list,
srcu_read_lock_held(&mpam_srcu)) {
if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
ctrl_comp = comp_iter;
break;
}
}
/* class has no component for this CPU */
if (WARN_ON_ONCE(!ctrl_comp))
return ERR_PTR(-EINVAL);
dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
if (!dom)
return ERR_PTR(-ENOMEM);
if (r->alloc_capable) {
dom->ctrl_comp = ctrl_comp;
ctrl_d = &dom->resctrl_ctrl_dom;
mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
err = resctrl_online_ctrl_domain(r, ctrl_d);
if (err)
goto free_domain;
mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
} else {
pr_debug("Skipped control domain online - no controls\n");
}
if (r->mon_capable) {
struct mpam_component *any_mon_comp;
struct mpam_resctrl_mon *mon;
enum resctrl_event_id eventid;
/*
* Even if the monitor domain is backed by a different
* component, the L3 component IDs need to be used... only
* there may be no ctrl_comp for the L3.
* Search each event's class list for a component with
* overlapping CPUs and set up the dom->mon_comp array.
*/
for_each_mpam_resctrl_mon(mon, eventid) {
struct mpam_component *mon_comp;
if (!mon->class)
continue; // dummy resource
mon_comp = find_component(mon->class, cpu);
dom->mon_comp[eventid] = mon_comp;
if (mon_comp)
any_mon_comp = mon_comp;
}
if (!any_mon_comp) {
WARN_ON_ONCE(0);
err = -EFAULT;
goto offline_ctrl_domain;
}
mon_d = &dom->resctrl_mon_dom;
mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
mon_d->hdr.type = RESCTRL_MON_DOMAIN;
err = resctrl_online_mon_domain(r, &mon_d->hdr);
if (err)
goto offline_ctrl_domain;
mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
} else {
pr_debug("Skipped monitor domain online - no monitors\n");
}
return dom;
offline_ctrl_domain:
if (r->alloc_capable) {
mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
resctrl_offline_ctrl_domain(r, ctrl_d);
}
free_domain:
kfree(dom);
dom = ERR_PTR(err);
return dom;
}
/*
* We know all the monitors are associated with the L3, even if there are no
* controls and therefore no control component. Find the cache-id for the CPU
* and use that to search for existing resctrl domains.
* This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
* for anything that is not a cache.
*/
static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
{
int cache_id;
struct mpam_resctrl_dom *dom;
struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
lockdep_assert_cpus_held();
if (!l3->class)
return NULL;
cache_id = get_cpu_cacheinfo_id(cpu, 3);
if (cache_id < 0)
return NULL;
list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
if (dom->resctrl_mon_dom.hdr.id == cache_id)
return dom;
}
return NULL;
}
static struct mpam_resctrl_dom *
mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
{
struct mpam_resctrl_dom *dom;
struct rdt_resource *r = &res->resctrl_res;
lockdep_assert_cpus_held();
list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
return dom;
}
if (r->rid != RDT_RESOURCE_L3)
return NULL;
/* Search the mon domain list too - needed on monitor only platforms. */
return mpam_resctrl_get_mon_domain_from_cpu(cpu);
}
int mpam_resctrl_online_cpu(unsigned int cpu)
{
struct mpam_resctrl_res *res;
enum resctrl_res_level rid;
guard(mutex)(&domain_list_lock);
for_each_mpam_resctrl_control(res, rid) {
struct mpam_resctrl_dom *dom;
struct rdt_resource *r = &res->resctrl_res;
if (!res->class)
continue; // dummy_resource;
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
if (!dom) {
dom = mpam_resctrl_alloc_domain(cpu, res);
if (IS_ERR(dom))
return PTR_ERR(dom);
} else {
if (r->alloc_capable) {
struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
}
if (r->mon_capable) {
struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
}
}
}
resctrl_online_cpu(cpu);
return 0;
}
void mpam_resctrl_offline_cpu(unsigned int cpu)
{
struct mpam_resctrl_res *res;
enum resctrl_res_level rid;
resctrl_offline_cpu(cpu);
guard(mutex)(&domain_list_lock);
for_each_mpam_resctrl_control(res, rid) {
struct mpam_resctrl_dom *dom;
struct rdt_l3_mon_domain *mon_d;
struct rdt_ctrl_domain *ctrl_d;
bool ctrl_dom_empty, mon_dom_empty;
struct rdt_resource *r = &res->resctrl_res;
if (!res->class)
continue; // dummy resource
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
if (WARN_ON_ONCE(!dom))
continue;
if (r->alloc_capable) {
ctrl_d = &dom->resctrl_ctrl_dom;
ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
if (ctrl_dom_empty)
resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
} else {
ctrl_dom_empty = true;
}
if (r->mon_capable) {
mon_d = &dom->resctrl_mon_dom;
mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
if (mon_dom_empty)
resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
} else {
mon_dom_empty = true;
}
if (ctrl_dom_empty && mon_dom_empty)
kfree(dom);
}
}
int mpam_resctrl_setup(void)
{
int err = 0;
struct mpam_resctrl_res *res;
enum resctrl_res_level rid;
struct mpam_resctrl_mon *mon;
enum resctrl_event_id eventid;
wait_event(wait_cacheinfo_ready, cacheinfo_ready);
cpus_read_lock();
for_each_mpam_resctrl_control(res, rid) {
INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
res->resctrl_res.rid = rid;
}
/* Find some classes to use for controls */
mpam_resctrl_pick_caches();
mpam_resctrl_pick_mba();
/* Initialise the resctrl structures from the classes */
for_each_mpam_resctrl_control(res, rid) {
if (!res->class)
continue; // dummy resource
err = mpam_resctrl_control_init(res);
if (err) {
pr_debug("Failed to initialise rid %u\n", rid);
goto internal_error;
}
}
/* Find some classes to use for monitors */
mpam_resctrl_pick_counters();
for_each_mpam_resctrl_mon(mon, eventid) {
if (!mon->class)
continue; // dummy resource
err = mpam_resctrl_monitor_init(mon, eventid);
if (err) {
pr_debug("Failed to initialise event %u\n", eventid);
goto internal_error;
}
}
cpus_read_unlock();
if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
return -EOPNOTSUPP;
}
err = resctrl_init();
if (err)
return err;
WRITE_ONCE(resctrl_enabled, true);
return 0;
internal_error:
cpus_read_unlock();
pr_debug("Internal error %d - resctrl not supported\n", err);
return err;
}
void mpam_resctrl_exit(void)
{
if (!READ_ONCE(resctrl_enabled))
return;
WRITE_ONCE(resctrl_enabled, false);
resctrl_exit();
}
/*
* The driver is detaching an MSC from this class, if resctrl was using it,
* pull on resctrl_exit().
*/
void mpam_resctrl_teardown_class(struct mpam_class *class)
{
struct mpam_resctrl_res *res;
enum resctrl_res_level rid;
struct mpam_resctrl_mon *mon;
enum resctrl_event_id eventid;
might_sleep();
for_each_mpam_resctrl_control(res, rid) {
if (res->class == class) {
res->class = NULL;
break;
}
}
for_each_mpam_resctrl_mon(mon, eventid) {
if (mon->class == class) {
mon->class = NULL;
break;
}
}
}
static int __init __cacheinfo_ready(void)
{
cacheinfo_ready = true;
wake_up(&wait_cacheinfo_ready);
return 0;
}
device_initcall_sync(__cacheinfo_ready);
#ifdef CONFIG_MPAM_KUNIT_TEST
#include "test_mpam_resctrl.c"
#endif