arch/metag/kernel/perf/perf_event.c - pub/scm/linux/kernel/git/lizf/linux-3.4.y - Git at Google

 /*
  * Meta performance counter support.
  *  Copyright (C) 2012 Imagination Technologies Ltd
  *
  * This code is based on the sh pmu code:
  *  Copyright (C) 2009 Paul Mundt
  *
  * and on the arm pmu code:
  *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
  *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */

 #include <linux/atomic.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/irqchip/metag.h>
 #include <linux/perf_event.h>
 #include <linux/slab.h>

 #include <asm/core_reg.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/processor.h>

 #include "perf_event.h"

 static int _hw_perf_event_init(struct perf_event *);
 static void _hw_perf_event_destroy(struct perf_event *);

 /* Determines which core type we are */
 static struct metag_pmu *metag_pmu __read_mostly;

 /* Processor specific data */
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);

 /* PMU admin */
 const char *perf_pmu_name(void)
 {
 	if (!metag_pmu)
 		return NULL;

 	return metag_pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);

 int perf_num_counters(void)
 {
 	if (metag_pmu)
 		return metag_pmu->max_events;

 	return 0;
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);

 static inline int metag_pmu_initialised(void)
 {
 	return !!metag_pmu;
 }

 static void release_pmu_hardware(void)
 {
 	int irq;
 	unsigned int version = (metag_pmu->version &
 			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
 			METAC_ID_REV_S;

 	/* Early cores don't have overflow interrupts */
 	if (version < 0x0104)
 		return;

 	irq = internal_irq_map(17);
 	if (irq >= 0)
 		free_irq(irq, (void *)1);

 	irq = internal_irq_map(16);
 	if (irq >= 0)
 		free_irq(irq, (void *)0);
 }

 static int reserve_pmu_hardware(void)
 {
 	int err = 0, irq[2];
 	unsigned int version = (metag_pmu->version &
 			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
 			METAC_ID_REV_S;

 	/* Early cores don't have overflow interrupts */
 	if (version < 0x0104)
 		goto out;

 	/*
 	 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
 	 * similarly, 17 is the interrupt for performance counter 1.
 	 * We can't (yet) interrupt on the cycle counter, because it's a
 	 * register, however it holds a 32-bit value as opposed to 24-bit.
 	 */
 	irq[0] = internal_irq_map(16);
 	if (irq[0] < 0) {
 		pr_err("unable to map internal IRQ %d\n", 16);
 		goto out;
 	}
 	err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
 			"metagpmu0", (void *)0);
 	if (err) {
 		pr_err("unable to request IRQ%d for metag PMU counters\n",
 				irq[0]);
 		goto out;
 	}

 	irq[1] = internal_irq_map(17);
 	if (irq[1] < 0) {
 		pr_err("unable to map internal IRQ %d\n", 17);
 		goto out_irq1;
 	}
 	err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
 			"metagpmu1", (void *)1);
 	if (err) {
 		pr_err("unable to request IRQ%d for metag PMU counters\n",
 				irq[1]);
 		goto out_irq1;
 	}

 	return 0;

 out_irq1:
 	free_irq(irq[0], (void *)0);
 out:
 	return err;
 }

 /* PMU operations */
 static void metag_pmu_enable(struct pmu *pmu)
 {
 }

 static void metag_pmu_disable(struct pmu *pmu)
 {
 }

 static int metag_pmu_event_init(struct perf_event *event)
 {
 	int err = 0;
 	atomic_t *active_events = &metag_pmu->active_events;

 	if (!metag_pmu_initialised()) {
 		err = -ENODEV;
 		goto out;
 	}

 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;

 	event->destroy = _hw_perf_event_destroy;

 	if (!atomic_inc_not_zero(active_events)) {
 		mutex_lock(&metag_pmu->reserve_mutex);
 		if (atomic_read(active_events) == 0)
 			err = reserve_pmu_hardware();

 		if (!err)
 			atomic_inc(active_events);

 		mutex_unlock(&metag_pmu->reserve_mutex);
 	}

 	/* Hardware and caches counters */
 	switch (event->attr.type) {
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
 	case PERF_TYPE_RAW:
 		err = _hw_perf_event_init(event);
 		break;

 	default:
 		return -ENOENT;
 	}

 	if (err)
 		event->destroy(event);

 out:
 	return err;
 }

 void metag_pmu_event_update(struct perf_event *event,
 		struct hw_perf_event *hwc, int idx)
 {
 	u64 prev_raw_count, new_raw_count;
 	s64 delta;

 	/*
 	 * If this counter is chained, it may be that the previous counter
 	 * value has been changed beneath us.
 	 *
 	 * To get around this, we read and exchange the new raw count, then
 	 * add the delta (new - prev) to the generic counter atomically.
 	 *
 	 * Without interrupts, this is the simplest approach.
 	 */
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = metag_pmu->read(idx);

 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			new_raw_count) != prev_raw_count)
 		goto again;

 	/*
 	 * Calculate the delta and add it to the counter.
 	 */
 	delta = (new_raw_count - prev_raw_count) & MAX_PERIOD;

 	local64_add(delta, &event->count);
 	local64_sub(delta, &hwc->period_left);
 }

 int metag_pmu_event_set_period(struct perf_event *event,
 		struct hw_perf_event *hwc, int idx)
 {
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;

 	/* The period may have been changed */
 	if (unlikely(period != hwc->last_period))
 		left += period - hwc->last_period;

 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}

 	if (unlikely(left <= 0)) {
 		left += period;
 		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}

 	if (left > (s64)metag_pmu->max_period)
 		left = metag_pmu->max_period;

 	if (metag_pmu->write) {
 		local64_set(&hwc->prev_count, -(s32)left);
 		metag_pmu->write(idx, -left & MAX_PERIOD);
 	}

 	perf_event_update_userpage(event);

 	return ret;
 }

 static void metag_pmu_start(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;

 	if (WARN_ON_ONCE(idx == -1))
 		return;

 	/*
 	 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
 	 */
 	if (flags & PERF_EF_RELOAD)
 		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));

 	hwc->state = 0;

 	/*
 	 * Reset the period.
 	 * Some counters can't be stopped (i.e. are core global), so when the
 	 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
 	 * the period, then we'll either: a) get an overflow too soon;
 	 * or b) too late if the overflow happened since disabling.
 	 * Obviously, this has little bearing on cores without the overflow
 	 * interrupt, as the performance counter resets to zero on write
 	 * anyway.
 	 */
 	if (metag_pmu->max_period)
 		metag_pmu_event_set_period(event, hwc, hwc->idx);
 	cpuc->events[idx] = event;
 	metag_pmu->enable(hwc, idx);
 }

 static void metag_pmu_stop(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;

 	/*
 	 * We should always update the counter on stop; see comment above
 	 * why.
 	 */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
 		metag_pmu_event_update(event, hwc, hwc->idx);
 		metag_pmu->disable(hwc, hwc->idx);
 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	}
 }

 static int metag_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = 0, ret = 0;

 	perf_pmu_disable(event->pmu);

 	/* check whether we're counting instructions */
 	if (hwc->config == 0x100) {
 		if (__test_and_set_bit(METAG_INST_COUNTER,
 				cpuc->used_mask)) {
 			ret = -EAGAIN;
 			goto out;
 		}
 		idx = METAG_INST_COUNTER;
 	} else {
 		/* Check whether we have a spare counter */
 		idx = find_first_zero_bit(cpuc->used_mask,
 				atomic_read(&metag_pmu->active_events));
 		if (idx >= METAG_INST_COUNTER) {
 			ret = -EAGAIN;
 			goto out;
 		}

 		__set_bit(idx, cpuc->used_mask);
 	}
 	hwc->idx = idx;

 	/* Make sure the counter is disabled */
 	metag_pmu->disable(hwc, idx);

 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	if (flags & PERF_EF_START)
 		metag_pmu_start(event, PERF_EF_RELOAD);

 	perf_event_update_userpage(event);
 out:
 	perf_pmu_enable(event->pmu);
 	return ret;
 }

 static void metag_pmu_del(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;

 	WARN_ON(idx < 0);
 	metag_pmu_stop(event, PERF_EF_UPDATE);
 	cpuc->events[idx] = NULL;
 	__clear_bit(idx, cpuc->used_mask);

 	perf_event_update_userpage(event);
 }

 static void metag_pmu_read(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;

 	/* Don't read disabled counters! */
 	if (hwc->idx < 0)
 		return;

 	metag_pmu_event_update(event, hwc, hwc->idx);
 }

 static struct pmu pmu = {
 	.pmu_enable	= metag_pmu_enable,
 	.pmu_disable	= metag_pmu_disable,

 	.event_init	= metag_pmu_event_init,

 	.add		= metag_pmu_add,
 	.del		= metag_pmu_del,
 	.start		= metag_pmu_start,
 	.stop		= metag_pmu_stop,
 	.read		= metag_pmu_read,
 };

 /* Core counter specific functions */
 static const int metag_general_events[] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = 0x03,
 	[PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
 	[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
 	[PERF_COUNT_HW_CACHE_MISSES] = -1,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
 	[PERF_COUNT_HW_BRANCH_MISSES] = -1,
 	[PERF_COUNT_HW_BUS_CYCLES] = -1,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
 	[PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
 };

 static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = 0x08,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(L1I)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = 0x09,
 			[C(RESULT_MISS)] = 0x0a,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(LL)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(DTLB)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = 0xd0,
 			[C(RESULT_MISS)] = 0xd2,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = 0xd4,
 			[C(RESULT_MISS)] = 0xd5,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(ITLB)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = 0xd1,
 			[C(RESULT_MISS)] = 0xd3,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(BPU)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 	[C(NODE)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 		[C(OP_PREFETCH)] = {
 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 		},
 	},
 };


 static void _hw_perf_event_destroy(struct perf_event *event)
 {
 	atomic_t *active_events = &metag_pmu->active_events;
 	struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;

 	if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
 		release_pmu_hardware();
 		mutex_unlock(pmu_mutex);
 	}
 }

 static int _hw_perf_cache_event(int config, int *evp)
 {
 	unsigned long type, op, result;
 	int ev;

 	if (!metag_pmu->cache_events)
 		return -EINVAL;

 	/* Unpack config */
 	type = config & 0xff;
 	op = (config >> 8) & 0xff;
 	result = (config >> 16) & 0xff;

 	if (type >= PERF_COUNT_HW_CACHE_MAX ||
 			op >= PERF_COUNT_HW_CACHE_OP_MAX ||
 			result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;

 	ev = (*metag_pmu->cache_events)[type][op][result];
 	if (ev == 0)
 		return -EOPNOTSUPP;
 	if (ev == -1)
 		return -EINVAL;
 	*evp = ev;
 	return 0;
 }

 static int _hw_perf_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping = 0, err;

 	switch (attr->type) {
 	case PERF_TYPE_HARDWARE:
 		if (attr->config >= PERF_COUNT_HW_MAX)
 			return -EINVAL;

 		mapping = metag_pmu->event_map(attr->config);
 		break;

 	case PERF_TYPE_HW_CACHE:
 		err = _hw_perf_cache_event(attr->config, &mapping);
 		if (err)
 			return err;
 		break;

 	case PERF_TYPE_RAW:
 		mapping = attr->config;
 		break;
 	}

 	/* Return early if the event is unsupported */
 	if (mapping == -1)
 		return -EINVAL;

 	/*
 	 * Don't assign an index until the event is placed into the hardware.
 	 * -1 signifies that we're still deciding where to put it. On SMP
 	 * systems each core has its own set of counters, so we can't do any
 	 * constraint checking yet.
 	 */
 	hwc->idx = -1;

 	/* Store the event encoding */
 	hwc->config |= (unsigned long)mapping;

 	/*
 	 * For non-sampling runs, limit the sample_period to half of the
 	 * counter width. This way, the new counter value should be less
 	 * likely to overtake the previous one (unless there are IRQ latency
 	 * issues...)
 	 */
 	if (metag_pmu->max_period) {
 		if (!hwc->sample_period) {
 			hwc->sample_period = metag_pmu->max_period >> 1;
 			hwc->last_period = hwc->sample_period;
 			local64_set(&hwc->period_left, hwc->sample_period);
 		}
 	}

 	return 0;
 }

 static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
 {
 	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
 	unsigned int config = event->config;
 	unsigned int tmp = config & 0xf0;
 	unsigned long flags;

 	raw_spin_lock_irqsave(&events->pmu_lock, flags);

 	/*
 	 * Check if we're enabling the instruction counter (index of
 	 * MAX_HWEVENTS - 1)
 	 */
 	if (METAG_INST_COUNTER == idx) {
 		WARN_ONCE((config != 0x100),
 			"invalid configuration (%d) for counter (%d)\n",
 			config, idx);
 		local64_set(&event->prev_count, __core_reg_get(TXTACTCYC));
 		goto unlock;
 	}

 	/* Check for a core internal or performance channel event. */
 	if (tmp) {
 		void *perf_addr;

 		/*
 		 * Anything other than a cycle count will write the low-
 		 * nibble to the correct counter register.
 		 */
 		switch (tmp) {
 		case 0xd0:
 			perf_addr = (void *)PERF_ICORE(idx);
 			break;

 		case 0xf0:
 			perf_addr = (void *)PERF_CHAN(idx);
 			break;

 		default:
 			perf_addr = NULL;
 			break;
 		}

 		if (perf_addr)
 			metag_out32((config & 0x0f), perf_addr);

 		/*
 		 * Now we use the high nibble as the performance event to
 		 * to count.
 		 */
 		config = tmp >> 4;
 	}

 	tmp = ((config & 0xf) << 28) |
 			((1 << 24) << hard_processor_id());
 	if (metag_pmu->max_period)
 		/*
 		 * Cores supporting overflow interrupts may have had the counter
 		 * set to a specific value that needs preserving.
 		 */
 		tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
 	else
 		/*
 		 * Older cores reset the counter on write, so prev_count needs
 		 * resetting too so we can calculate a correct delta.
 		 */
 		local64_set(&event->prev_count, 0);

 	metag_out32(tmp, PERF_COUNT(idx));
 unlock:
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }

 static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
 {
 	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
 	unsigned int tmp = 0;
 	unsigned long flags;

 	/*
 	 * The cycle counter can't be disabled per se, as it's a hardware
 	 * thread register which is always counting. We merely return if this
 	 * is the counter we're attempting to disable.
 	 */
 	if (METAG_INST_COUNTER == idx)
 		return;

 	/*
 	 * The counter value _should_ have been read prior to disabling,
 	 * as if we're running on an early core then the value gets reset to
 	 * 0, and any read after that would be useless. On the newer cores,
 	 * however, it's better to read-modify-update this for purposes of
 	 * the overflow interrupt.
 	 * Here we remove the thread id AND the event nibble (there are at
 	 * least two events that count events that are core global and ignore
 	 * the thread id mask). This only works because we don't mix thread
 	 * performance counts, and event 0x00 requires a thread id mask!
 	 */
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);

 	tmp = metag_in32(PERF_COUNT(idx));
 	tmp &= 0x00ffffff;
 	metag_out32(tmp, PERF_COUNT(idx));

 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }

 static u64 metag_pmu_read_counter(int idx)
 {
 	u32 tmp = 0;

 	if (METAG_INST_COUNTER == idx) {
 		tmp = __core_reg_get(TXTACTCYC);
 		goto out;
 	}

 	tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
 out:
 	return tmp;
 }

 static void metag_pmu_write_counter(int idx, u32 val)
 {
 	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
 	u32 tmp = 0;
 	unsigned long flags;

 	/*
 	 * This _shouldn't_ happen, but if it does, then we can just
 	 * ignore the write, as the register is read-only and clear-on-write.
 	 */
 	if (METAG_INST_COUNTER == idx)
 		return;

 	/*
 	 * We'll keep the thread mask and event id, and just update the
 	 * counter itself. Also , we should bound the value to 24-bits.
 	 */
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);

 	val &= 0x00ffffff;
 	tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
 	val |= tmp;
 	metag_out32(val, PERF_COUNT(idx));

 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }

 static int metag_pmu_event_map(int idx)
 {
 	return metag_general_events[idx];
 }

 static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
 {
 	int idx = (int)dev;
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *event = cpuhw->events[idx];
 	struct hw_perf_event *hwc = &event->hw;
 	struct pt_regs *regs = get_irq_regs();
 	struct perf_sample_data sampledata;
 	unsigned long flags;
 	u32 counter = 0;

 	/*
 	 * We need to stop the core temporarily from generating another
 	 * interrupt while we disable this counter. However, we don't want
 	 * to flag the counter as free
 	 */
 	__global_lock2(flags);
 	counter = metag_in32(PERF_COUNT(idx));
 	metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
 	__global_unlock2(flags);

 	/* Update the counts and reset the sample period */
 	metag_pmu_event_update(event, hwc, idx);
 	perf_sample_data_init(&sampledata, 0, hwc->last_period);
 	metag_pmu_event_set_period(event, hwc, idx);

 	/*
 	 * Enable the counter again once core overflow processing has
 	 * completed. Note the counter value may have been modified while it was
 	 * inactive to set it up ready for the next interrupt.
 	 */
 	if (!perf_event_overflow(event, &sampledata, regs)) {
 		__global_lock2(flags);
 		counter = (counter & 0xff000000) |
 			  (metag_in32(PERF_COUNT(idx)) & 0x00ffffff);
 		metag_out32(counter, PERF_COUNT(idx));
 		__global_unlock2(flags);
 	}

 	return IRQ_HANDLED;
 }

 static struct metag_pmu _metag_pmu = {
 	.handle_irq	= metag_pmu_counter_overflow,
 	.enable		= metag_pmu_enable_counter,
 	.disable	= metag_pmu_disable_counter,
 	.read		= metag_pmu_read_counter,
 	.write		= metag_pmu_write_counter,
 	.event_map	= metag_pmu_event_map,
 	.cache_events	= &metag_pmu_cache_events,
 	.max_period	= MAX_PERIOD,
 	.max_events	= MAX_HWEVENTS,
 };

 /* PMU CPU hotplug notifier */
 static int metag_pmu_cpu_notify(struct notifier_block *b, unsigned long action,
 				void *hcpu)
 {
 	unsigned int cpu = (unsigned int)hcpu;
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;

 	memset(cpuc, 0, sizeof(struct cpu_hw_events));
 	raw_spin_lock_init(&cpuc->pmu_lock);

 	return NOTIFY_OK;
 }

 static struct notifier_block metag_pmu_notifier = {
 	.notifier_call = metag_pmu_cpu_notify,
 };

 /* PMU Initialisation */
 static int __init init_hw_perf_events(void)
 {
 	int ret = 0, cpu;
 	u32 version = *(u32 *)METAC_ID;
 	int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
 	int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
 			>> METAC_ID_REV_S;

 	/* Not a Meta 2 core, then not supported */
 	if (0x02 > major) {
 		pr_info("no hardware counter support available\n");
 		goto out;
 	} else if (0x02 == major) {
 		metag_pmu = &_metag_pmu;

 		if (min_rev < 0x0104) {
 			/*
 			 * A core without overflow interrupts, and clear-on-
 			 * write counters.
 			 */
 			metag_pmu->handle_irq = NULL;
 			metag_pmu->write = NULL;
 			metag_pmu->max_period = 0;
 		}

 		metag_pmu->name = "meta2";
 		metag_pmu->version = version;
 		metag_pmu->pmu = pmu;
 	}

 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			metag_pmu->name, metag_pmu->max_events);

 	/*
 	 * Early cores have "limited" counters - they have no overflow
 	 * interrupts - and so are unable to do sampling without extra work
 	 * and timer assistance.
 	 */
 	if (metag_pmu->max_period == 0) {
 		metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 	}

 	/* Initialise the active events and reservation mutex */
 	atomic_set(&metag_pmu->active_events, 0);
 	mutex_init(&metag_pmu->reserve_mutex);

 	/* Clear the counters */
 	metag_out32(0, PERF_COUNT(0));
 	metag_out32(0, PERF_COUNT(1));

 	for_each_possible_cpu(cpu) {
 		struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

 		memset(cpuc, 0, sizeof(struct cpu_hw_events));
 		raw_spin_lock_init(&cpuc->pmu_lock);
 	}

 	register_cpu_notifier(&metag_pmu_notifier);
 	ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
 out:
 	return ret;
 }
 early_initcall(init_hw_perf_events);