perf/x86/intel: Add Platform QoS event support
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7fd54f0..d29b38e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -37,6 +37,7 @@
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_qos.o
endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel_qos.c b/arch/x86/kernel/cpu/perf_event_intel_qos.c
new file mode 100644
index 0000000..14c0375
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_qos.c
@@ -0,0 +1,731 @@
+/*
+ * Platform Quality-of-Service (QoS) Monitoring.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include "perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC 0x0c8f
+#define MSR_IA32_QM_CTR 0x0c8e
+#define MSR_IA32_QM_EVTSEL 0x0c8d
+
+static unsigned int qos_max_rmid = -1;
+static unsigned int qos_l3_scale; /* supposedly cacheline size */
+
+struct intel_qos_state {
+ raw_spinlock_t lock;
+ int rmid;
+ int cnt;
+};
+
+static DEFINE_PER_CPU(struct intel_qos_state, qos_state);
+
+/*
+ * Protects the global state, hold both for modification, hold either for
+ * stability.
+ *
+ * XXX we modify RMID with only cache_mutex held, racy!
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+static unsigned long *qos_rmid_bitmap;
+
+/*
+ * All events
+ */
+static LIST_HEAD(cache_events);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * The new RMID we must not use until intel_qos_stable().
+ * See intel_qos_rotate().
+ */
+static unsigned long *cache_limbo_bitmap;
+
+/*
+ * The spare RMID that make rotation possible; keep out of the
+ * qos_rmid_bitmap to avoid it getting used for new events.
+ */
+static int cache_rotation_rmid;
+
+/*
+ * The freed RMIDs, see intel_qos_rotate().
+ */
+static int cache_freed_nr;
+static int *cache_freed_rmid;
+
+/*
+ * One online cpu per package, for intel_qos_stable().
+ */
+static cpumask_t cache_cpus;
+
+/*
+ * Returns < 0 on fail.
+ */
+static int __get_rmid(void)
+{
+ return bitmap_find_free_region(qos_rmid_bitmap, qos_max_rmid, 0);
+}
+
+static void __put_rmid(int rmid)
+{
+ bitmap_release_region(qos_rmid_bitmap, rmid, 0);
+}
+
+/*
+ * Needs a quesent state before __put, see intel_qos_stabilize().
+ */
+static void __free_rmid(int rmid)
+{
+ cache_freed_rmid[cache_freed_nr++] = rmid;
+}
+
+#define RMID_VAL_ERROR (1ULL << 63)
+#define RMID_VAL_UNAVAIL (1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
+
+#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
+
+static u64 __rmid_read(unsigned long rmid)
+{
+ u64 val;
+
+ /*
+ * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+ * it just says that to increase confusion.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+
+ /*
+ * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+ * the number of cachelines tagged with @rmid.
+ */
+ return val;
+}
+
+/*
+ * Check whether the corresponding value for the RMID is non-zero, which
+ * indicates that this RMID is in use.
+ */
+static void smp_test_stable(void *info)
+{
+ bool *inuse = info;
+ int i;
+
+ for (i = 0; i < cache_freed_nr; i++) {
+ if (__rmid_read(cache_freed_rmid[i]))
+ *inuse = true;
+ }
+}
+
+/*
+ * Test if the rotation_rmid is unused; see the comment near
+ * intel_qos_rotate().
+ */
+static bool intel_qos_is_stable(void)
+{
+ bool inuse = false;
+
+ smp_call_function_many(&cache_cpus, smp_test_stable, &inuse, true);
+
+ return !inuse;
+}
+
+/*
+ * Quescent state; wait for all the 'freed' RMIDs to become unused.
+ * After this we can can reuse them and know that the current set of
+ * active RMIDs is stable.
+ */
+static void intel_qos_stabilize(void)
+{
+ int i = 0;
+
+ if (!cache_freed_nr)
+ return;
+
+ /*
+ * Now wait until the old RMID drops back to 0 again, this means all
+ * cachelines have acquired a new tag and the new RMID is now stable.
+ */
+ while (!intel_qos_is_stable()) {
+ /*
+ * XXX adaptive timeout? Ideally the hardware would get us an
+ * interrupt :/
+ */
+ schedule_timeout_uninterruptible(1);
+ }
+
+ bitmap_clear(cache_limbo_bitmap, 0, qos_max_rmid);
+
+ if (cache_rotation_rmid <= 0) {
+ cache_rotation_rmid = cache_freed_rmid[0];
+ i++;
+ }
+
+ for (; i < cache_freed_nr; i++)
+ __put_rmid(cache_freed_rmid[i]);
+
+ cache_freed_nr = 0;
+}
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static unsigned long
+cache_group_xchg_rmid(struct perf_event *group, unsigned long rmid)
+{
+ struct perf_event *event;
+ unsigned long old_rmid = group->hw.qos_rmid;
+ struct hw_perf_event *hw = &group->hw;
+
+ hw->qos_rmid = rmid;
+ list_for_each_entry(event, &hw->qos_group_entry, hw.qos_group_entry)
+ event->hw.qos_rmid = rmid;
+
+ return old_rmid;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+ if ((a->attach_state & PERF_ATTACH_TASK) !=
+ (b->attach_state & PERF_ATTACH_TASK))
+ return false;
+
+ if (a->attach_state & PERF_ATTACH_TASK) {
+ if (a->hw.qos_target != b->hw.qos_target)
+ return false;
+
+ return true;
+ }
+
+ /* not task */
+
+#ifdef CONFIG_CGROUP_PERF
+ if ((a->cgrp == b->cgrp) && a->cgrp)
+ return true;
+#endif
+
+ return true; /* if not task or cgroup, we're machine wide */
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+ if (event->cgrp)
+ return event->cgrp;
+
+ if (event->attach_state & PERF_ATTACH_TASK) /* XXX */
+ return perf_cgroup_from_task(event->hw.qos_target);
+
+ return NULL;
+}
+#endif
+
+/*
+ * Determine if @na's tasks intersect with @b's tasks
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+ struct perf_cgroup *ac, *bc;
+
+ ac = event_to_cgroup(a);
+ bc = event_to_cgroup(b);
+
+ if (!ac || !bc) {
+ /*
+ * If either is NULL, its a system wide event and that
+ * always conflicts with a cgroup one.
+ *
+ * If both are system wide, __match_event() should've
+ * been true and we'll never get here, if we did fail.
+ */
+ return true;
+ }
+
+ /*
+ * If one is a parent of the other, we've got an intersection.
+ */
+ if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+ cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+ return true;
+#endif
+
+ /*
+ * If one of them is not a task, same story as above with cgroups.
+ */
+ if (!(a->attach_state & PERF_ATTACH_TASK) ||
+ !(b->attach_state & PERF_ATTACH_TASK))
+ return true;
+
+ /*
+ * Again, if they're the same __match_event() should've caught us, if not fail.
+ */
+ if (a->hw.qos_target == b->hw.qos_target)
+ return true;
+
+ /*
+ * Must be non-overlapping.
+ */
+ return false;
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs, ought to run from
+ * an delayed work or somesuch.
+ *
+ * Rotating RMIDs is complicated; firstly because the hardware doesn't
+ * give us any clues; secondly because of cgroups.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Secondly, since cgroups can nest, we must make sure to not program
+ * conflicting cgroups at the same time. A conflicting cgroup is one
+ * that has a parent<->child relation. After all, a task of the child
+ * cgroup will also be covered by the parent cgroup.
+ *
+ * Therefore, when selecting a new group, we must invalidate all
+ * conflicting groups. Rotations allows us to measure all (conflicting)
+ * groups sequentially.
+ *
+ * XXX there's a further problem in that because we do our own rotation
+ * and cheat with schedulability the event {enabled,running} times are
+ * incorrect.
+ */
+static bool intel_qos_rotate(void)
+{
+ struct perf_event *rotor, *group;
+ int rmid;
+
+ mutex_lock(&cache_mutex);
+
+ if (list_empty(&cache_groups))
+ goto unlock_mutex;
+
+ rotor = list_first_entry(&cache_groups, struct perf_event, hw.qos_groups_entry);
+
+ raw_spin_lock_irq(&cache_lock);
+ list_del(&rotor->hw.qos_groups_entry);
+ rmid = cache_group_xchg_rmid(rotor, -1);
+ WARN_ON_ONCE(rmid <= 0); /* first entry must always have an RMID */
+ __free_rmid(rmid);
+ raw_spin_unlock_irq(&cache_lock);
+
+ /*
+ * XXX O(n^2) schedulability
+ */
+
+ list_for_each_entry(group, &cache_groups, hw.qos_groups_entry) {
+ bool conflicts = false;
+ struct perf_event *iter;
+
+ list_for_each_entry(iter, &cache_groups, hw.qos_groups_entry) {
+ if (iter == group)
+ break;
+ if (__conflict_event(group, iter)) {
+ conflicts = true;
+ break;
+ }
+ }
+
+ if (conflicts && group->hw.qos_rmid > 0) {
+ rmid = cache_group_xchg_rmid(group, -1);
+ WARN_ON_ONCE(rmid <= 0);
+ __free_rmid(rmid);
+ continue;
+ }
+
+ if (!conflicts && group->hw.qos_rmid <= 0) {
+ rmid = __get_rmid();
+ if (rmid <= 0) {
+ rmid = cache_rotation_rmid;
+ cache_rotation_rmid = -1;
+ }
+ set_bit(rmid, cache_limbo_bitmap);
+ if (rmid <= 0)
+ break; /* we're out of RMIDs, more next time */
+
+ rmid = cache_group_xchg_rmid(group, rmid);
+ WARN_ON_ONCE(rmid > 0);
+ continue;
+ }
+
+ /*
+ * either we conflict and do not have an RMID -> good,
+ * or we do not conflict and have an RMID -> also good.
+ */
+ }
+
+ raw_spin_lock_irq(&cache_lock);
+ list_add_tail(&rotor->hw.qos_groups_entry, &cache_groups);
+ raw_spin_unlock_irq(&cache_lock);
+
+ /*
+ * XXX force a PMU reprogram here such that the new RMIDs are in
+ * effect.
+ */
+
+ intel_qos_stabilize();
+
+unlock_mutex:
+ mutex_unlock(&cache_mutex);
+
+ /*
+ * XXX reschedule work.
+ */
+ return false;
+}
+
+/*
+ * Find a group and setup RMID
+ */
+static struct perf_event *intel_qos_setup_event(struct perf_event *event)
+{
+ struct perf_event *iter;
+ int rmid = 0; /* unset */
+
+ list_for_each_entry(iter, &cache_groups, hw.qos_groups_entry) {
+ if (__match_event(iter, event)) {
+ event->hw.qos_rmid = iter->hw.qos_rmid;
+ return iter;
+ }
+ if (__conflict_event(iter, event))
+ rmid = -1; /* conflicting rmid */
+ }
+
+ if (!rmid) {
+ /* XXX lacks stabilization */
+ event->hw.qos_rmid = __get_rmid();
+ }
+
+ return NULL;
+}
+
+static void intel_qos_event_read(struct perf_event *event)
+{
+ unsigned long rmid = event->hw.qos_rmid;
+ u64 val = RMID_VAL_UNAVAIL;
+
+ if (!test_bit(rmid, cache_limbo_bitmap))
+ val = __rmid_read(rmid);
+
+ /*
+ * Ignore this reading on error states and do not update the value.
+ */
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+
+ val *= qos_l3_scale; /* cachelines -> bytes */
+
+ local64_set(&event->count, val);
+}
+
+static void intel_qos_event_start(struct perf_event *event, int mode)
+{
+ struct intel_qos_state *state = &__get_cpu_var(qos_state);
+ unsigned long rmid = event->hw.qos_rmid;
+ unsigned long flags;
+
+ if (!(event->hw.qos_state & PERF_HES_STOPPED))
+ return;
+
+ event->hw.qos_state &= ~PERF_HES_STOPPED;
+
+ raw_spin_lock_irqsave(&state->lock, flags);
+ if (state->cnt++)
+ WARN_ON_ONCE(state->rmid != rmid);
+ else
+ WARN_ON_ONCE(state->rmid);
+ state->rmid = rmid;
+ wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+ raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static void intel_qos_event_stop(struct perf_event *event, int mode)
+{
+ struct intel_qos_state *state = &__get_cpu_var(qos_state);
+ unsigned long flags;
+
+ if (event->hw.qos_state & PERF_HES_STOPPED)
+ return;
+
+ event->hw.qos_state |= PERF_HES_STOPPED;
+
+ raw_spin_lock_irqsave(&state->lock, flags);
+ intel_qos_event_read(event);
+
+ if (!--state->cnt) {
+ state->rmid = 0;
+ wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+ } else {
+ WARN_ON_ONCE(!state->rmid);
+ }
+
+ raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static int intel_qos_event_add(struct perf_event *event, int mode)
+{
+ unsigned long flags;
+ int rmid;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ event->hw.qos_state = PERF_HES_STOPPED;
+ rmid = event->hw.qos_rmid;
+ if (rmid <= 0)
+ goto unlock;
+
+ if (mode & PERF_EF_START)
+ intel_qos_event_start(event, mode);
+
+unlock:
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ return 0;
+}
+
+static void intel_qos_event_del(struct perf_event *event, int mode)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+ intel_qos_event_stop(event, mode);
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void intel_qos_event_destroy(struct perf_event *event)
+{
+ struct perf_event *group_other = NULL;
+
+ mutex_lock(&cache_mutex);
+ raw_spin_lock_irq(&cache_lock);
+
+ list_del(&event->hw.qos_events_entry);
+
+ /*
+ * If there's another event in this group...
+ */
+ if (!list_empty(&event->hw.qos_group_entry)) {
+ group_other = list_first_entry(&event->hw.qos_group_entry,
+ struct perf_event,
+ hw.qos_group_entry);
+ list_del(&event->hw.qos_group_entry);
+ }
+ /*
+ * And we're the group leader..
+ */
+ if (!list_empty(&event->hw.qos_groups_entry)) {
+ /*
+ * If there was a group_other, make that leader, otherwise
+ * destroy the group and return the RMID.
+ */
+ if (group_other) {
+ list_replace(&event->hw.qos_groups_entry,
+ &group_other->hw.qos_groups_entry);
+ } else {
+ int rmid = event->hw.qos_rmid;
+ if (rmid > 0)
+ __put_rmid(rmid);
+ list_del(&event->hw.qos_groups_entry);
+ }
+ }
+
+ raw_spin_unlock_irq(&cache_lock);
+ mutex_unlock(&cache_mutex);
+}
+
+static struct pmu intel_qos_pmu;
+
+/*
+ * Takes non-sampling task,cgroup or machine wide events.
+ *
+ * XXX there's a bit of a problem in that we cannot simply do the one
+ * event per node as one would want, since that one event would one get
+ * scheduled on the one cpu. But we want to 'schedule' the RMID on all
+ * CPUs.
+ *
+ * This means we want events for each CPU, however, that generates a lot
+ * of duplicate values out to userspace -- this is not to be helped
+ * unless we want to change the core code in some way.
+ */
+static int intel_qos_event_init(struct perf_event *event)
+{
+ struct perf_event *group;
+
+ if (event->attr.type != intel_qos_pmu.type)
+ return -ENOENT;
+
+ if (event->attr.config & ~QOS_EVENT_MASK)
+ return -EINVAL;
+
+ if (event->cpu == -1) /* must have per-cpu events; see above */
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ event->destroy = intel_qos_event_destroy;
+
+ mutex_lock(&cache_mutex);
+
+ INIT_LIST_HEAD(&event->hw.qos_group_entry);
+ group = intel_qos_setup_event(event); /* will also set rmid */
+
+ raw_spin_lock_irq(&cache_lock);
+ if (group) {
+ event->hw.qos_rmid = group->hw.qos_rmid;
+ list_add_tail(&event->hw.qos_group_entry,
+ &group->hw.qos_group_entry);
+ } else {
+ list_add_tail(&event->hw.qos_groups_entry,
+ &cache_groups);
+ }
+
+ list_add_tail(&event->hw.qos_events_entry, &cache_events);
+ raw_spin_unlock_irq(&cache_lock);
+
+ mutex_unlock(&cache_mutex);
+
+ return 0;
+}
+
+EVENT_ATTR_STR(cache_occupancy, intel_qos_cache, "event=0x01");
+
+static struct attribute *intel_qos_events_attr[] = {
+ EVENT_PTR(intel_qos_cache),
+ NULL,
+};
+
+static struct attribute_group intel_qos_events_group = {
+ .name = "events",
+ .attrs = intel_qos_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_qos_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group intel_qos_format_group = {
+ .name = "format",
+ .attrs = intel_qos_formats_attr,
+};
+
+const struct attribute_group *intel_qos_attr_groups[] = {
+ &intel_qos_events_group,
+ &intel_qos_format_group,
+ NULL,
+};
+
+static struct pmu intel_qos_pmu = {
+ .attr_groups = intel_qos_attr_groups,
+ .task_ctx_nr = perf_sw_context,
+ .event_init = intel_qos_event_init,
+ .add = intel_qos_event_add,
+ .del = intel_qos_event_del,
+ .start = intel_qos_event_start,
+ .stop = intel_qos_event_stop,
+ .read = intel_qos_event_read,
+};
+
+static int __init intel_qos_init(void)
+{
+ int i, cpu, ret;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_CQM_OCCUP_LLC)) {
+ pr_info("Intel QoS not supported\n");
+ return -ENODEV;
+ }
+
+ qos_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+ /*
+ * It's possible that not all resources support the same number
+ * of RMIDs. Instead of making scheduling much more complicated
+ * (where we have to match a task's RMID to a cpu that supports
+ * that many RMIDs) just find the minimum RMIDs supported across
+ * all cpus.
+ *
+ * Also, check that the scales match on all cpus.
+ */
+ for_each_online_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_cache_max_rmid < qos_max_rmid)
+ qos_max_rmid = c->x86_cache_max_rmid;
+
+ if (c->x86_cache_occ_scale != qos_l3_scale) {
+ pr_err("Multiple LLC scale values, disabling\n");
+ return -EINVAL;
+ }
+ }
+
+ qos_rmid_bitmap = kmalloc(sizeof(long) * BITS_TO_LONGS(qos_max_rmid), GFP_KERNEL);
+ if (!qos_rmid_bitmap)
+ return -ENOMEM;
+
+ cache_limbo_bitmap = kmalloc(sizeof(long) * BITS_TO_LONGS(qos_max_rmid), GFP_KERNEL);
+ if (!cache_limbo_bitmap)
+ return -ENOMEM; /* XXX frees */
+
+ cache_freed_rmid = kmalloc(sizeof(int) * qos_max_rmid, GFP_KERNEL);
+ if (!cache_freed_rmid)
+ return -ENOMEM; /* XXX free bitmaps */
+
+ bitmap_zero(qos_rmid_bitmap, qos_max_rmid);
+ bitmap_set(qos_rmid_bitmap, 0, 1); /* RMID 0 is special */
+ cache_rotation_rmid = __get_rmid(); /* keep one free RMID for rotation */
+ if (WARN_ON_ONCE(cache_rotation_rmid < 0))
+ return cache_rotation_rmid;
+
+ /*
+ * XXX hotplug notifiers!
+ */
+ for_each_possible_cpu(i) {
+ struct intel_qos_state *state = &per_cpu(qos_state, i);
+
+ raw_spin_lock_init(&state->lock);
+ state->rmid = 0;
+ }
+
+ ret = perf_pmu_register(&intel_qos_pmu, "intel_qos", -1);
+ if (ret)
+ pr_err("Intel QoS perf registration failed: %d\n", ret);
+ else
+ pr_info("Intel QoS monitoring enabled\n");
+
+ return ret;
+}
+device_initcall(intel_qos_init);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e56b07f..a48e01e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -126,6 +126,14 @@
/* for tp_event->class */
struct list_head tp_list;
};
+ struct { /* intel_qos */
+ struct task_struct *qos_target;
+ int qos_state;
+ int qos_rmid;
+ struct list_head qos_events_entry;
+ struct list_head qos_groups_entry;
+ struct list_head qos_group_entry;
+ };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
/*
@@ -526,6 +534,33 @@
int page;
};
+#ifdef CONFIG_CGROUP_PERF
+
+#include <linux/cgroup.h>
+
+struct perf_cgroup_info;
+
+struct perf_cgroup {
+ struct cgroup_subsys_state css;
+ struct perf_cgroup_info __percpu *info;
+};
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ *
+ * XXX: its not safe to use this thing!!!
+ */
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task)
+{
+ return container_of(task_css(task, perf_subsys_id),
+ struct perf_cgroup, css);
+}
+
+#endif /* CONFIG_CGROUP_PERF */
+
#ifdef CONFIG_PERF_EVENTS
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fa0b2d4..f129a04 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -329,23 +329,6 @@
u64 timestamp;
};
-struct perf_cgroup {
- struct cgroup_subsys_state css;
- struct perf_cgroup_info __percpu *info;
-};
-
-/*
- * Must ensure cgroup is pinned (css_get) before calling
- * this function. In other words, we cannot call this function
- * if there is no cgroup event for the current CPU context.
- */
-static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
-{
- return container_of(task_css(task, perf_subsys_id),
- struct perf_cgroup, css);
-}
-
static inline bool
perf_cgroup_match(struct perf_event *event)
{