hwlat: drop related patches
diff --git a/patches/hwlat-detector-Don-t-ignore-threshold-module-paramet.patch b/patches/hwlat-detector-Don-t-ignore-threshold-module-paramet.patch
deleted file mode 100644
index e4cba99..0000000
--- a/patches/hwlat-detector-Don-t-ignore-threshold-module-paramet.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From: Mike Galbraith <bitbucket@online.de>
-Date: Fri, 30 Aug 2013 07:57:25 +0200
-Subject: hwlat-detector: Don't ignore threshold module parameter
-
-If the user specified a threshold at module load time, use it.
-
-
-Acked-by: Steven Rostedt <rostedt@goodmis.org>
-Signed-off-by: Mike Galbraith <bitbucket@online.de>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- drivers/misc/hwlat_detector.c |    2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/drivers/misc/hwlat_detector.c
-+++ b/drivers/misc/hwlat_detector.c
-@@ -414,7 +414,7 @@ static int init_stats(void)
- 		goto out;
- 
- 	__reset_stats();
--	data.threshold = DEFAULT_LAT_THRESHOLD;	    /* threshold us */
-+	data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
- 	data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
- 	data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */
- 
diff --git a/patches/hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch b/patches/hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch
deleted file mode 100644
index 58f97a8..0000000
--- a/patches/hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch
+++ /dev/null
@@ -1,125 +0,0 @@
-From: Steven Rostedt <rostedt@goodmis.org>
-Date: Mon, 19 Aug 2013 17:33:25 -0400
-Subject: hwlat-detector: Update hwlat_detector to add outer loop detection
-
-The hwlat_detector reads two timestamps in a row, then reports any
-gap between those calls. The problem is, it misses everything between
-the second reading of the time stamp to the first reading of the time stamp
-in the next loop. That's were most of the time is spent, which means,
-chances are likely that it will miss all hardware latencies. This
-defeats the purpose.
-
-By also testing the first time stamp from the previous loop second
-time stamp (the outer loop), we are more likely to find a latency.
-
-Setting the threshold to 1, here's what the report now looks like:
-
-1347415723.0232202770	0	2
-1347415725.0234202822	0	2
-1347415727.0236202875	0	2
-1347415729.0238202928	0	2
-1347415731.0240202980	0	2
-1347415734.0243203061	0	2
-1347415736.0245203113	0	2
-1347415738.0247203166	2	0
-1347415740.0249203219	0	3
-1347415742.0251203272	0	3
-1347415743.0252203299	0	3
-1347415745.0254203351	0	2
-1347415747.0256203404	0	2
-1347415749.0258203457	0	2
-1347415751.0260203510	0	2
-1347415754.0263203589	0	2
-1347415756.0265203642	0	2
-1347415758.0267203695	0	2
-1347415760.0269203748	0	2
-1347415762.0271203801	0	2
-1347415764.0273203853	2	0
-
-There's some hardware latency that takes 2 microseconds to run.
-
-Signed-off-by: Steven Rostedt <srostedt@redhat.com>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- drivers/misc/hwlat_detector.c |   32 ++++++++++++++++++++++++++------
- 1 file changed, 26 insertions(+), 6 deletions(-)
-
---- a/drivers/misc/hwlat_detector.c
-+++ b/drivers/misc/hwlat_detector.c
-@@ -143,6 +143,7 @@ static void detector_exit(void);
- struct sample {
- 	u64		seqnum;		/* unique sequence */
- 	u64		duration;	/* ktime delta */
-+	u64		outer_duration;	/* ktime delta (outer loop) */
- 	struct timespec	timestamp;	/* wall time */
- 	unsigned long   lost;
- };
-@@ -219,11 +220,13 @@ static struct sample *buffer_get_sample(
-  */
- static int get_sample(void *unused)
- {
--	ktime_t start, t1, t2;
-+	ktime_t start, t1, t2, last_t2;
- 	s64 diff, total = 0;
- 	u64 sample = 0;
-+	u64 outer_sample = 0;
- 	int ret = 1;
- 
-+	last_t2.tv64 = 0;
- 	start = ktime_get(); /* start timestamp */
- 
- 	do {
-@@ -231,7 +234,22 @@ static int get_sample(void *unused)
- 		t1 = ktime_get();	/* we'll look for a discontinuity */
- 		t2 = ktime_get();
- 
-+		if (last_t2.tv64) {
-+			/* Check the delta from outer loop (t2 to next t1) */
-+			diff = ktime_to_us(ktime_sub(t1, last_t2));
-+			/* This shouldn't happen */
-+			if (diff < 0) {
-+				pr_err(BANNER "time running backwards\n");
-+				goto out;
-+			}
-+			if (diff > outer_sample)
-+				outer_sample = diff;
-+		}
-+		last_t2 = t2;
-+
- 		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
-+
-+		/* This checks the inner loop (t1 to t2) */
- 		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
- 
- 		/* This shouldn't happen */
-@@ -246,12 +264,13 @@ static int get_sample(void *unused)
- 	} while (total <= data.sample_width);
- 
- 	/* If we exceed the threshold value, we have found a hardware latency */
--	if (sample > data.threshold) {
-+	if (sample > data.threshold || outer_sample > data.threshold) {
- 		struct sample s;
- 
- 		data.count++;
- 		s.seqnum = data.count;
- 		s.duration = sample;
-+		s.outer_duration = outer_sample;
- 		s.timestamp = CURRENT_TIME;
- 		__buffer_add_sample(&s);
- 
-@@ -738,10 +757,11 @@ static ssize_t debug_sample_fread(struct
- 		}
- 	}
- 
--	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
--		      sample->timestamp.tv_sec,
--		      sample->timestamp.tv_nsec,
--		      sample->duration);
-+	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
-+		       sample->timestamp.tv_sec,
-+		       sample->timestamp.tv_nsec,
-+		       sample->duration,
-+		       sample->outer_duration);
- 
- 
- 	/* handling partial reads is more trouble than it's worth */
diff --git a/patches/hwlat-detector-Use-thread-instead-of-stop-machine.patch b/patches/hwlat-detector-Use-thread-instead-of-stop-machine.patch
deleted file mode 100644
index fe1a435..0000000
--- a/patches/hwlat-detector-Use-thread-instead-of-stop-machine.patch
+++ /dev/null
@@ -1,183 +0,0 @@
-From: Steven Rostedt <rostedt@goodmis.org>
-Date: Mon, 19 Aug 2013 17:33:27 -0400
-Subject: hwlat-detector: Use thread instead of stop machine
-
-There's no reason to use stop machine to search for hardware latency.
-Simply disabling interrupts while running the loop will do enough to
-check if something comes in that wasn't disabled by interrupts being
-off, which is exactly what stop machine does.
-
-Instead of using stop machine, just have the thread disable interrupts
-while it checks for hardware latency.
-
-Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- drivers/misc/hwlat_detector.c |   60 ++++++++++++++++++------------------------
- 1 file changed, 26 insertions(+), 34 deletions(-)
-
---- a/drivers/misc/hwlat_detector.c
-+++ b/drivers/misc/hwlat_detector.c
-@@ -41,7 +41,6 @@
- #include <linux/module.h>
- #include <linux/init.h>
- #include <linux/ring_buffer.h>
--#include <linux/stop_machine.h>
- #include <linux/time.h>
- #include <linux/hrtimer.h>
- #include <linux/kthread.h>
-@@ -107,7 +106,6 @@ struct data;					/* Global state */
- /* Sampling functions */
- static int __buffer_add_sample(struct sample *sample);
- static struct sample *buffer_get_sample(struct sample *sample);
--static int get_sample(void *unused);
- 
- /* Threading and state */
- static int kthread_fn(void *unused);
-@@ -149,7 +147,7 @@ struct sample {
- 	unsigned long   lost;
- };
- 
--/* keep the global state somewhere. Mostly used under stop_machine. */
-+/* keep the global state somewhere. */
- static struct data {
- 
- 	struct mutex lock;		/* protect changes */
-@@ -172,7 +170,7 @@ static struct data {
-  * @sample: The new latency sample value
-  *
-  * This receives a new latency sample and records it in a global ring buffer.
-- * No additional locking is used in this case - suited for stop_machine use.
-+ * No additional locking is used in this case.
-  */
- static int __buffer_add_sample(struct sample *sample)
- {
-@@ -229,18 +227,18 @@ static struct sample *buffer_get_sample(
- #endif
- /**
-  * get_sample - sample the CPU TSC and look for likely hardware latencies
-- * @unused: This is not used but is a part of the stop_machine API
-  *
-  * Used to repeatedly capture the CPU TSC (or similar), looking for potential
-- * hardware-induced latency. Called under stop_machine, with data.lock held.
-+ * hardware-induced latency. Called with interrupts disabled and with
-+ * data.lock held.
-  */
--static int get_sample(void *unused)
-+static int get_sample(void)
- {
- 	time_type start, t1, t2, last_t2;
- 	s64 diff, total = 0;
- 	u64 sample = 0;
- 	u64 outer_sample = 0;
--	int ret = 1;
-+	int ret = -1;
- 
- 	init_time(last_t2, 0);
- 	start = time_get(); /* start timestamp */
-@@ -279,10 +277,14 @@ static int get_sample(void *unused)
- 
- 	} while (total <= data.sample_width);
- 
-+	ret = 0;
-+
- 	/* If we exceed the threshold value, we have found a hardware latency */
- 	if (sample > data.threshold || outer_sample > data.threshold) {
- 		struct sample s;
- 
-+		ret = 1;
-+
- 		data.count++;
- 		s.seqnum = data.count;
- 		s.duration = sample;
-@@ -295,7 +297,6 @@ static int get_sample(void *unused)
- 			data.max_sample = sample;
- 	}
- 
--	ret = 0;
- out:
- 	return ret;
- }
-@@ -305,32 +306,30 @@ static int get_sample(void *unused)
-  * @unused: A required part of the kthread API.
-  *
-  * Used to periodically sample the CPU TSC via a call to get_sample. We
-- * use stop_machine, whith does (intentionally) introduce latency since we
-+ * disable interrupts, which does (intentionally) introduce latency since we
-  * need to ensure nothing else might be running (and thus pre-empting).
-  * Obviously this should never be used in production environments.
-  *
-- * stop_machine will schedule us typically only on CPU0 which is fine for
-- * almost every real-world hardware latency situation - but we might later
-- * generalize this if we find there are any actualy systems with alternate
-- * SMI delivery or other non CPU0 hardware latencies.
-+ * Currently this runs on which ever CPU it was scheduled on, but most
-+ * real-worald hardware latency situations occur across several CPUs,
-+ * but we might later generalize this if we find there are any actualy
-+ * systems with alternate SMI delivery or other hardware latencies.
-  */
- static int kthread_fn(void *unused)
- {
--	int err = 0;
--	u64 interval = 0;
-+	int ret;
-+	u64 interval;
- 
- 	while (!kthread_should_stop()) {
- 
- 		mutex_lock(&data.lock);
- 
--		err = stop_machine(get_sample, unused, 0);
--		if (err) {
--			/* Houston, we have a problem */
--			mutex_unlock(&data.lock);
--			goto err_out;
--		}
-+		local_irq_disable();
-+		ret = get_sample();
-+		local_irq_enable();
- 
--		wake_up(&data.wq); /* wake up reader(s) */
-+		if (ret > 0)
-+			wake_up(&data.wq); /* wake up reader(s) */
- 
- 		interval = data.sample_window - data.sample_width;
- 		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
-@@ -338,15 +337,10 @@ static int kthread_fn(void *unused)
- 		mutex_unlock(&data.lock);
- 
- 		if (msleep_interruptible(interval))
--			goto out;
-+			break;
- 	}
--		goto out;
--err_out:
--	pr_err(BANNER "could not call stop_machine, disabling\n");
--	enabled = 0;
--out:
--	return err;
- 
-+	return 0;
- }
- 
- /**
-@@ -442,8 +436,7 @@ static int init_stats(void)
-  * This function provides a generic read implementation for the global state
-  * "data" structure debugfs filesystem entries. It would be nice to use
-  * simple_attr_read directly, but we need to make sure that the data.lock
-- * spinlock is held during the actual read (even though we likely won't ever
-- * actually race here as the updater runs under a stop_machine context).
-+ * is held during the actual read.
-  */
- static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
- 				size_t cnt, loff_t *ppos, const u64 *entry)
-@@ -478,8 +471,7 @@ static ssize_t simple_data_read(struct f
-  * This function provides a generic write implementation for the global state
-  * "data" structure debugfs filesystem entries. It would be nice to use
-  * simple_attr_write directly, but we need to make sure that the data.lock
-- * spinlock is held during the actual write (even though we likely won't ever
-- * actually race here as the updater runs under a stop_machine context).
-+ * is held during the actual write.
-  */
- static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
- 				 size_t cnt, loff_t *ppos, u64 *entry)
diff --git a/patches/hwlat-detector-Use-trace_clock_local-if-available.patch b/patches/hwlat-detector-Use-trace_clock_local-if-available.patch
deleted file mode 100644
index a45adaa..0000000
--- a/patches/hwlat-detector-Use-trace_clock_local-if-available.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From: Steven Rostedt <rostedt@goodmis.org>
-Date: Mon, 19 Aug 2013 17:33:26 -0400
-Subject: hwlat-detector: Use trace_clock_local if available
-
-As ktime_get() calls into the timing code which does a read_seq(), it
-may be affected by other CPUS that touch that lock. To remove this
-dependency, use the trace_clock_local() which is already exported
-for module use. If CONFIG_TRACING is enabled, use that as the clock,
-otherwise use ktime_get().
-
-Signed-off-by: Steven Rostedt <srostedt@redhat.com>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- drivers/misc/hwlat_detector.c |   34 +++++++++++++++++++++++++---------
- 1 file changed, 25 insertions(+), 9 deletions(-)
-
---- a/drivers/misc/hwlat_detector.c
-+++ b/drivers/misc/hwlat_detector.c
-@@ -51,6 +51,7 @@
- #include <linux/version.h>
- #include <linux/delay.h>
- #include <linux/slab.h>
-+#include <linux/trace_clock.h>
- 
- #define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
- #define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
-@@ -211,6 +212,21 @@ static struct sample *buffer_get_sample(
- 	return sample;
- }
- 
-+#ifndef CONFIG_TRACING
-+#define time_type	ktime_t
-+#define time_get()	ktime_get()
-+#define time_to_us(x)	ktime_to_us(x)
-+#define time_sub(a, b)	ktime_sub(a, b)
-+#define init_time(a, b)	(a).tv64 = b
-+#define time_u64(a)	((a).tv64)
-+#else
-+#define time_type	u64
-+#define time_get()	trace_clock_local()
-+#define time_to_us(x)	div_u64(x, 1000)
-+#define time_sub(a, b)	((a) - (b))
-+#define init_time(a, b)	(a = b)
-+#define time_u64(a)	a
-+#endif
- /**
-  * get_sample - sample the CPU TSC and look for likely hardware latencies
-  * @unused: This is not used but is a part of the stop_machine API
-@@ -220,23 +236,23 @@ static struct sample *buffer_get_sample(
-  */
- static int get_sample(void *unused)
- {
--	ktime_t start, t1, t2, last_t2;
-+	time_type start, t1, t2, last_t2;
- 	s64 diff, total = 0;
- 	u64 sample = 0;
- 	u64 outer_sample = 0;
- 	int ret = 1;
- 
--	last_t2.tv64 = 0;
--	start = ktime_get(); /* start timestamp */
-+	init_time(last_t2, 0);
-+	start = time_get(); /* start timestamp */
- 
- 	do {
- 
--		t1 = ktime_get();	/* we'll look for a discontinuity */
--		t2 = ktime_get();
-+		t1 = time_get();	/* we'll look for a discontinuity */
-+		t2 = time_get();
- 
--		if (last_t2.tv64) {
-+		if (time_u64(last_t2)) {
- 			/* Check the delta from outer loop (t2 to next t1) */
--			diff = ktime_to_us(ktime_sub(t1, last_t2));
-+			diff = time_to_us(time_sub(t1, last_t2));
- 			/* This shouldn't happen */
- 			if (diff < 0) {
- 				pr_err(BANNER "time running backwards\n");
-@@ -247,10 +263,10 @@ static int get_sample(void *unused)
- 		}
- 		last_t2 = t2;
- 
--		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
-+		total = time_to_us(time_sub(t2, start)); /* sample width */
- 
- 		/* This checks the inner loop (t1 to t2) */
--		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
-+		diff = time_to_us(time_sub(t2, t1));     /* current diff */
- 
- 		/* This shouldn't happen */
- 		if (diff < 0) {
diff --git a/patches/hwlatdetect.patch b/patches/hwlatdetect.patch
deleted file mode 100644
index 93df5c5..0000000
--- a/patches/hwlatdetect.patch
+++ /dev/null
@@ -1,1347 +0,0 @@
-Subject: hwlatdetect.patch
-From: Carsten Emde <C.Emde@osadl.org>
-Date: Tue, 19 Jul 2011 13:53:12 +0100
-
-Jon Masters developed this wonderful SMI detector. For details please
-consult Documentation/hwlat_detector.txt. It could be ported to Linux
-3.0 RT without any major change.
-
-Signed-off-by: Carsten Emde <C.Emde@osadl.org>
-
----
- Documentation/hwlat_detector.txt |   64 ++
- drivers/misc/Kconfig             |   29 
- drivers/misc/Makefile            |    1 
- drivers/misc/hwlat_detector.c    | 1212 +++++++++++++++++++++++++++++++++++++++
- 4 files changed, 1306 insertions(+)
-
---- /dev/null
-+++ b/Documentation/hwlat_detector.txt
-@@ -0,0 +1,64 @@
-+Introduction:
-+-------------
-+
-+The module hwlat_detector is a special purpose kernel module that is used to
-+detect large system latencies induced by the behavior of certain underlying
-+hardware or firmware, independent of Linux itself. The code was developed
-+originally to detect SMIs (System Management Interrupts) on x86 systems,
-+however there is nothing x86 specific about this patchset. It was
-+originally written for use by the "RT" patch since the Real Time
-+kernel is highly latency sensitive.
-+
-+SMIs are usually not serviced by the Linux kernel, which typically does not
-+even know that they are occuring. SMIs are instead are set up by BIOS code
-+and are serviced by BIOS code, usually for "critical" events such as
-+management of thermal sensors and fans. Sometimes though, SMIs are used for
-+other tasks and those tasks can spend an inordinate amount of time in the
-+handler (sometimes measured in milliseconds). Obviously this is a problem if
-+you are trying to keep event service latencies down in the microsecond range.
-+
-+The hardware latency detector works by hogging all of the cpus for configurable
-+amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
-+for some period, then looking for gaps in the TSC data. Any gap indicates a
-+time when the polling was interrupted and since the machine is stopped and
-+interrupts turned off the only thing that could do that would be an SMI.
-+
-+Note that the SMI detector should *NEVER* be used in a production environment.
-+It is intended to be run manually to determine if the hardware platform has a
-+problem with long system firmware service routines.
-+
-+Usage:
-+------
-+
-+Loading the module hwlat_detector passing the parameter "enabled=1" (or by
-+setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
-+step required to start the hwlat_detector. It is possible to redefine the
-+threshold in microseconds (us) above which latency spikes will be taken
-+into account (parameter "threshold=").
-+
-+Example:
-+
-+	# modprobe hwlat_detector enabled=1 threshold=100
-+
-+After the module is loaded, it creates a directory named "hwlat_detector" under
-+the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
-+to have debugfs mounted, which might be on /sys/debug on your system.
-+
-+The /debug/hwlat_detector interface contains the following files:
-+
-+count			- number of latency spikes observed since last reset
-+enable			- a global enable/disable toggle (0/1), resets count
-+max			- maximum hardware latency actually observed (usecs)
-+sample			- a pipe from which to read current raw sample data
-+			  in the format <timestamp> <latency observed usecs>
-+			  (can be opened O_NONBLOCK for a single sample)
-+threshold		- minimum latency value to be considered (usecs)
-+width			- time period to sample with CPUs held (usecs)
-+			  must be less than the total window size (enforced)
-+window			- total period of sampling, width being inside (usecs)
-+
-+By default we will set width to 500,000 and window to 1,000,000, meaning that
-+we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
-+observe any latencies that exceed the threshold (initially 100 usecs),
-+then we write to a global sample ring buffer of 8K samples, which is
-+consumed by reading from the "sample" (pipe) debugfs file interface.
---- a/drivers/misc/Kconfig
-+++ b/drivers/misc/Kconfig
-@@ -122,6 +122,35 @@ config IBM_ASM
- 	  for information on the specific driver level and support statement
- 	  for your IBM server.
- 
-+config HWLAT_DETECTOR
-+	tristate "Testing module to detect hardware-induced latencies"
-+	depends on DEBUG_FS
-+	depends on RING_BUFFER
-+	default m
-+	---help---
-+	  A simple hardware latency detector. Use this module to detect
-+	  large latencies introduced by the behavior of the underlying
-+	  system firmware external to Linux. We do this using periodic
-+	  use of stop_machine to grab all available CPUs and measure
-+	  for unexplainable gaps in the CPU timestamp counter(s). By
-+	  default, the module is not enabled until the "enable" file
-+	  within the "hwlat_detector" debugfs directory is toggled.
-+
-+	  This module is often used to detect SMI (System Management
-+	  Interrupts) on x86 systems, though is not x86 specific. To
-+	  this end, we default to using a sample window of 1 second,
-+	  during which we will sample for 0.5 seconds. If an SMI or
-+	  similar event occurs during that time, it is recorded
-+	  into an 8K samples global ring buffer until retreived.
-+
-+	  WARNING: This software should never be enabled (it can be built
-+	  but should not be turned on after it is loaded) in a production
-+	  environment where high latencies are a concern since the
-+	  sampling mechanism actually introduces latencies for
-+	  regular tasks while the CPU(s) are being held.
-+
-+	  If unsure, say N
-+
- config PHANTOM
- 	tristate "Sensable PHANToM (PCI)"
- 	depends on PCI
---- a/drivers/misc/Makefile
-+++ b/drivers/misc/Makefile
-@@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT)		+= c2port/
- obj-$(CONFIG_HMC6352)		+= hmc6352.o
- obj-y				+= eeprom/
- obj-y				+= cb710/
-+obj-$(CONFIG_HWLAT_DETECTOR)	+= hwlat_detector.o
- obj-$(CONFIG_SPEAR13XX_PCIE_GADGET)	+= spear13xx_pcie_gadget.o
- obj-$(CONFIG_VMWARE_BALLOON)	+= vmw_balloon.o
- obj-$(CONFIG_ARM_CHARLCD)	+= arm-charlcd.o
---- /dev/null
-+++ b/drivers/misc/hwlat_detector.c
-@@ -0,0 +1,1212 @@
-+/*
-+ * hwlat_detector.c - A simple Hardware Latency detector.
-+ *
-+ * Use this module to detect large system latencies induced by the behavior of
-+ * certain underlying system hardware or firmware, independent of Linux itself.
-+ * The code was developed originally to detect the presence of SMIs on Intel
-+ * and AMD systems, although there is no dependency upon x86 herein.
-+ *
-+ * The classical example usage of this module is in detecting the presence of
-+ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
-+ * somewhat special form of hardware interrupt spawned from earlier CPU debug
-+ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
-+ * LPC (or other device) to generate a special interrupt under certain
-+ * circumstances, for example, upon expiration of a special SMI timer device,
-+ * due to certain external thermal readings, on certain I/O address accesses,
-+ * and other situations. An SMI hits a special CPU pin, triggers a special
-+ * SMI mode (complete with special memory map), and the OS is unaware.
-+ *
-+ * Although certain hardware-inducing latencies are necessary (for example,
-+ * a modern system often requires an SMI handler for correct thermal control
-+ * and remote management) they can wreak havoc upon any OS-level performance
-+ * guarantees toward low-latency, especially when the OS is not even made
-+ * aware of the presence of these interrupts. For this reason, we need a
-+ * somewhat brute force mechanism to detect these interrupts. In this case,
-+ * we do it by hogging all of the CPU(s) for configurable timer intervals,
-+ * sampling the built-in CPU timer, looking for discontiguous readings.
-+ *
-+ * WARNING: This implementation necessarily introduces latencies. Therefore,
-+ *          you should NEVER use this module in a production environment
-+ *          requiring any kind of low-latency performance guarantee(s).
-+ *
-+ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
-+ *
-+ * Includes useful feedback from Clark Williams <clark@redhat.com>
-+ *
-+ * This file is licensed under the terms of the GNU General Public
-+ * License version 2. This program is licensed "as is" without any
-+ * warranty of any kind, whether express or implied.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/ring_buffer.h>
-+#include <linux/stop_machine.h>
-+#include <linux/time.h>
-+#include <linux/hrtimer.h>
-+#include <linux/kthread.h>
-+#include <linux/debugfs.h>
-+#include <linux/seq_file.h>
-+#include <linux/uaccess.h>
-+#include <linux/version.h>
-+#include <linux/delay.h>
-+#include <linux/slab.h>
-+
-+#define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
-+#define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
-+#define U64STR_SIZE		22			/* 20 digits max */
-+
-+#define VERSION			"1.0.0"
-+#define BANNER			"hwlat_detector: "
-+#define DRVNAME			"hwlat_detector"
-+#define DEFAULT_SAMPLE_WINDOW	1000000			/* 1s */
-+#define DEFAULT_SAMPLE_WIDTH	500000			/* 0.5s */
-+#define DEFAULT_LAT_THRESHOLD	10			/* 10us */
-+
-+/* Module metadata */
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
-+MODULE_DESCRIPTION("A simple hardware latency detector");
-+MODULE_VERSION(VERSION);
-+
-+/* Module parameters */
-+
-+static int debug;
-+static int enabled;
-+static int threshold;
-+
-+module_param(debug, int, 0);			/* enable debug */
-+module_param(enabled, int, 0);			/* enable detector */
-+module_param(threshold, int, 0);		/* latency threshold */
-+
-+/* Buffering and sampling */
-+
-+static struct ring_buffer *ring_buffer;		/* sample buffer */
-+static DEFINE_MUTEX(ring_buffer_mutex);		/* lock changes */
-+static unsigned long buf_size = BUF_SIZE_DEFAULT;
-+static struct task_struct *kthread;		/* sampling thread */
-+
-+/* DebugFS filesystem entries */
-+
-+static struct dentry *debug_dir;		/* debugfs directory */
-+static struct dentry *debug_max;		/* maximum TSC delta */
-+static struct dentry *debug_count;		/* total detect count */
-+static struct dentry *debug_sample_width;	/* sample width us */
-+static struct dentry *debug_sample_window;	/* sample window us */
-+static struct dentry *debug_sample;		/* raw samples us */
-+static struct dentry *debug_threshold;		/* threshold us */
-+static struct dentry *debug_enable;		/* enable/disable */
-+
-+/* Individual samples and global state */
-+
-+struct sample;					/* latency sample */
-+struct data;					/* Global state */
-+
-+/* Sampling functions */
-+static int __buffer_add_sample(struct sample *sample);
-+static struct sample *buffer_get_sample(struct sample *sample);
-+static int get_sample(void *unused);
-+
-+/* Threading and state */
-+static int kthread_fn(void *unused);
-+static int start_kthread(void);
-+static int stop_kthread(void);
-+static void __reset_stats(void);
-+static int init_stats(void);
-+
-+/* Debugfs interface */
-+static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
-+				size_t cnt, loff_t *ppos, const u64 *entry);
-+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
-+				 size_t cnt, loff_t *ppos, u64 *entry);
-+static int debug_sample_fopen(struct inode *inode, struct file *filp);
-+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
-+				  size_t cnt, loff_t *ppos);
-+static int debug_sample_release(struct inode *inode, struct file *filp);
-+static int debug_enable_fopen(struct inode *inode, struct file *filp);
-+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
-+				  size_t cnt, loff_t *ppos);
-+static ssize_t debug_enable_fwrite(struct file *file,
-+				   const char __user *user_buffer,
-+				   size_t user_size, loff_t *offset);
-+
-+/* Initialization functions */
-+static int init_debugfs(void);
-+static void free_debugfs(void);
-+static int detector_init(void);
-+static void detector_exit(void);
-+
-+/* Individual latency samples are stored here when detected and packed into
-+ * the ring_buffer circular buffer, where they are overwritten when
-+ * more than buf_size/sizeof(sample) samples are received. */
-+struct sample {
-+	u64		seqnum;		/* unique sequence */
-+	u64		duration;	/* ktime delta */
-+	struct timespec	timestamp;	/* wall time */
-+	unsigned long   lost;
-+};
-+
-+/* keep the global state somewhere. Mostly used under stop_machine. */
-+static struct data {
-+
-+	struct mutex lock;		/* protect changes */
-+
-+	u64	count;			/* total since reset */
-+	u64	max_sample;		/* max hardware latency */
-+	u64	threshold;		/* sample threshold level */
-+
-+	u64	sample_window;		/* total sampling window (on+off) */
-+	u64	sample_width;		/* active sampling portion of window */
-+
-+	atomic_t sample_open;		/* whether the sample file is open */
-+
-+	wait_queue_head_t wq;		/* waitqeue for new sample values */
-+
-+} data;
-+
-+/**
-+ * __buffer_add_sample - add a new latency sample recording to the ring buffer
-+ * @sample: The new latency sample value
-+ *
-+ * This receives a new latency sample and records it in a global ring buffer.
-+ * No additional locking is used in this case - suited for stop_machine use.
-+ */
-+static int __buffer_add_sample(struct sample *sample)
-+{
-+	return ring_buffer_write(ring_buffer,
-+				 sizeof(struct sample), sample);
-+}
-+
-+/**
-+ * buffer_get_sample - remove a hardware latency sample from the ring buffer
-+ * @sample: Pre-allocated storage for the sample
-+ *
-+ * This retrieves a hardware latency sample from the global circular buffer
-+ */
-+static struct sample *buffer_get_sample(struct sample *sample)
-+{
-+	struct ring_buffer_event *e = NULL;
-+	struct sample *s = NULL;
-+	unsigned int cpu = 0;
-+
-+	if (!sample)
-+		return NULL;
-+
-+	mutex_lock(&ring_buffer_mutex);
-+	for_each_online_cpu(cpu) {
-+		e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
-+		if (e)
-+			break;
-+	}
-+
-+	if (e) {
-+		s = ring_buffer_event_data(e);
-+		memcpy(sample, s, sizeof(struct sample));
-+	} else
-+		sample = NULL;
-+	mutex_unlock(&ring_buffer_mutex);
-+
-+	return sample;
-+}
-+
-+/**
-+ * get_sample - sample the CPU TSC and look for likely hardware latencies
-+ * @unused: This is not used but is a part of the stop_machine API
-+ *
-+ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
-+ * hardware-induced latency. Called under stop_machine, with data.lock held.
-+ */
-+static int get_sample(void *unused)
-+{
-+	ktime_t start, t1, t2;
-+	s64 diff, total = 0;
-+	u64 sample = 0;
-+	int ret = 1;
-+
-+	start = ktime_get(); /* start timestamp */
-+
-+	do {
-+
-+		t1 = ktime_get();	/* we'll look for a discontinuity */
-+		t2 = ktime_get();
-+
-+		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
-+		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
-+
-+		/* This shouldn't happen */
-+		if (diff < 0) {
-+			pr_err(BANNER "time running backwards\n");
-+			goto out;
-+		}
-+
-+		if (diff > sample)
-+			sample = diff; /* only want highest value */
-+
-+	} while (total <= data.sample_width);
-+
-+	/* If we exceed the threshold value, we have found a hardware latency */
-+	if (sample > data.threshold) {
-+		struct sample s;
-+
-+		data.count++;
-+		s.seqnum = data.count;
-+		s.duration = sample;
-+		s.timestamp = CURRENT_TIME;
-+		__buffer_add_sample(&s);
-+
-+		/* Keep a running maximum ever recorded hardware latency */
-+		if (sample > data.max_sample)
-+			data.max_sample = sample;
-+	}
-+
-+	ret = 0;
-+out:
-+	return ret;
-+}
-+
-+/*
-+ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
-+ * @unused: A required part of the kthread API.
-+ *
-+ * Used to periodically sample the CPU TSC via a call to get_sample. We
-+ * use stop_machine, whith does (intentionally) introduce latency since we
-+ * need to ensure nothing else might be running (and thus pre-empting).
-+ * Obviously this should never be used in production environments.
-+ *
-+ * stop_machine will schedule us typically only on CPU0 which is fine for
-+ * almost every real-world hardware latency situation - but we might later
-+ * generalize this if we find there are any actualy systems with alternate
-+ * SMI delivery or other non CPU0 hardware latencies.
-+ */
-+static int kthread_fn(void *unused)
-+{
-+	int err = 0;
-+	u64 interval = 0;
-+
-+	while (!kthread_should_stop()) {
-+
-+		mutex_lock(&data.lock);
-+
-+		err = stop_machine(get_sample, unused, 0);
-+		if (err) {
-+			/* Houston, we have a problem */
-+			mutex_unlock(&data.lock);
-+			goto err_out;
-+		}
-+
-+		wake_up(&data.wq); /* wake up reader(s) */
-+
-+		interval = data.sample_window - data.sample_width;
-+		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
-+
-+		mutex_unlock(&data.lock);
-+
-+		if (msleep_interruptible(interval))
-+			goto out;
-+	}
-+		goto out;
-+err_out:
-+	pr_err(BANNER "could not call stop_machine, disabling\n");
-+	enabled = 0;
-+out:
-+	return err;
-+
-+}
-+
-+/**
-+ * start_kthread - Kick off the hardware latency sampling/detector kthread
-+ *
-+ * This starts a kernel thread that will sit and sample the CPU timestamp
-+ * counter (TSC or similar) and look for potential hardware latencies.
-+ */
-+static int start_kthread(void)
-+{
-+	kthread = kthread_run(kthread_fn, NULL,
-+					DRVNAME);
-+	if (IS_ERR(kthread)) {
-+		pr_err(BANNER "could not start sampling thread\n");
-+		enabled = 0;
-+		return -ENOMEM;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
-+ *
-+ * This kicks the running hardware latency sampling/detector kernel thread and
-+ * tells it to stop sampling now. Use this on unload and at system shutdown.
-+ */
-+static int stop_kthread(void)
-+{
-+	int ret;
-+
-+	ret = kthread_stop(kthread);
-+
-+	return ret;
-+}
-+
-+/**
-+ * __reset_stats - Reset statistics for the hardware latency detector
-+ *
-+ * We use data to store various statistics and global state. We call this
-+ * function in order to reset those when "enable" is toggled on or off, and
-+ * also at initialization. Should be called with data.lock held.
-+ */
-+static void __reset_stats(void)
-+{
-+	data.count = 0;
-+	data.max_sample = 0;
-+	ring_buffer_reset(ring_buffer); /* flush out old sample entries */
-+}
-+
-+/**
-+ * init_stats - Setup global state statistics for the hardware latency detector
-+ *
-+ * We use data to store various statistics and global state. We also use
-+ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
-+ * induced system latencies. This function initializes these structures and
-+ * allocates the global ring buffer also.
-+ */
-+static int init_stats(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	mutex_init(&data.lock);
-+	init_waitqueue_head(&data.wq);
-+	atomic_set(&data.sample_open, 0);
-+
-+	ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
-+
-+	if (WARN(!ring_buffer, KERN_ERR BANNER
-+			       "failed to allocate ring buffer!\n"))
-+		goto out;
-+
-+	__reset_stats();
-+	data.threshold = DEFAULT_LAT_THRESHOLD;	    /* threshold us */
-+	data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
-+	data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */
-+
-+	ret = 0;
-+
-+out:
-+	return ret;
-+
-+}
-+
-+/*
-+ * simple_data_read - Wrapper read function for global state debugfs entries
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ * @entry: The entry to read from
-+ *
-+ * This function provides a generic read implementation for the global state
-+ * "data" structure debugfs filesystem entries. It would be nice to use
-+ * simple_attr_read directly, but we need to make sure that the data.lock
-+ * spinlock is held during the actual read (even though we likely won't ever
-+ * actually race here as the updater runs under a stop_machine context).
-+ */
-+static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
-+				size_t cnt, loff_t *ppos, const u64 *entry)
-+{
-+	char buf[U64STR_SIZE];
-+	u64 val = 0;
-+	int len = 0;
-+
-+	memset(buf, 0, sizeof(buf));
-+
-+	if (!entry)
-+		return -EFAULT;
-+
-+	mutex_lock(&data.lock);
-+	val = *entry;
-+	mutex_unlock(&data.lock);
-+
-+	len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
-+
-+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
-+
-+}
-+
-+/*
-+ * simple_data_write - Wrapper write function for global state debugfs entries
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to write value from
-+ * @cnt: The maximum number of bytes to write
-+ * @ppos: The current "file" position
-+ * @entry: The entry to write to
-+ *
-+ * This function provides a generic write implementation for the global state
-+ * "data" structure debugfs filesystem entries. It would be nice to use
-+ * simple_attr_write directly, but we need to make sure that the data.lock
-+ * spinlock is held during the actual write (even though we likely won't ever
-+ * actually race here as the updater runs under a stop_machine context).
-+ */
-+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
-+				 size_t cnt, loff_t *ppos, u64 *entry)
-+{
-+	char buf[U64STR_SIZE];
-+	int csize = min(cnt, sizeof(buf));
-+	u64 val = 0;
-+	int err = 0;
-+
-+	memset(buf, '\0', sizeof(buf));
-+	if (copy_from_user(buf, ubuf, csize))
-+		return -EFAULT;
-+
-+	buf[U64STR_SIZE-1] = '\0';			/* just in case */
-+	err = kstrtoull(buf, 10, &val);
-+	if (err)
-+		return -EINVAL;
-+
-+	mutex_lock(&data.lock);
-+	*entry = val;
-+	mutex_unlock(&data.lock);
-+
-+	return csize;
-+}
-+
-+/**
-+ * debug_count_fopen - Open function for "count" debugfs entry
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "count" debugfs
-+ * interface to the hardware latency detector.
-+ */
-+static int debug_count_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_count_fread - Read function for "count" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "count" debugfs
-+ * interface to the hardware latency detector. Can be used to read the
-+ * number of latency readings exceeding the configured threshold since
-+ * the detector was last reset (e.g. by writing a zero into "count").
-+ */
-+static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
-+				     size_t cnt, loff_t *ppos)
-+{
-+	return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
-+}
-+
-+/**
-+ * debug_count_fwrite - Write function for "count" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "count" debugfs
-+ * interface to the hardware latency detector. Can be used to write a
-+ * desired value, especially to zero the total count.
-+ */
-+static ssize_t  debug_count_fwrite(struct file *filp,
-+				       const char __user *ubuf,
-+				       size_t cnt,
-+				       loff_t *ppos)
-+{
-+	return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
-+}
-+
-+/**
-+ * debug_enable_fopen - Dummy open function for "enable" debugfs interface
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "enable" debugfs
-+ * interface to the hardware latency detector.
-+ */
-+static int debug_enable_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_enable_fread - Read function for "enable" debugfs interface
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "enable" debugfs
-+ * interface to the hardware latency detector. Can be used to determine
-+ * whether the detector is currently enabled ("0\n" or "1\n" returned).
-+ */
-+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
-+				      size_t cnt, loff_t *ppos)
-+{
-+	char buf[4];
-+
-+	if ((cnt < sizeof(buf)) || (*ppos))
-+		return 0;
-+
-+	buf[0] = enabled ? '1' : '0';
-+	buf[1] = '\n';
-+	buf[2] = '\0';
-+	if (copy_to_user(ubuf, buf, strlen(buf)))
-+		return -EFAULT;
-+	return *ppos = strlen(buf);
-+}
-+
-+/**
-+ * debug_enable_fwrite - Write function for "enable" debugfs interface
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "enable" debugfs
-+ * interface to the hardware latency detector. Can be used to enable or
-+ * disable the detector, which will have the side-effect of possibly
-+ * also resetting the global stats and kicking off the measuring
-+ * kthread (on an enable) or the converse (upon a disable).
-+ */
-+static ssize_t  debug_enable_fwrite(struct file *filp,
-+					const char __user *ubuf,
-+					size_t cnt,
-+					loff_t *ppos)
-+{
-+	char buf[4];
-+	int csize = min(cnt, sizeof(buf));
-+	long val = 0;
-+	int err = 0;
-+
-+	memset(buf, '\0', sizeof(buf));
-+	if (copy_from_user(buf, ubuf, csize))
-+		return -EFAULT;
-+
-+	buf[sizeof(buf)-1] = '\0';			/* just in case */
-+	err = kstrtoul(buf, 10, &val);
-+	if (err)
-+		return -EINVAL;
-+
-+	if (val) {
-+		if (enabled)
-+			goto unlock;
-+		enabled = 1;
-+		__reset_stats();
-+		if (start_kthread())
-+			return -EFAULT;
-+	} else {
-+		if (!enabled)
-+			goto unlock;
-+		enabled = 0;
-+		err = stop_kthread();
-+		if (err) {
-+			pr_err(BANNER "cannot stop kthread\n");
-+			return -EFAULT;
-+		}
-+		wake_up(&data.wq);		/* reader(s) should return */
-+	}
-+unlock:
-+	return csize;
-+}
-+
-+/**
-+ * debug_max_fopen - Open function for "max" debugfs entry
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "max" debugfs
-+ * interface to the hardware latency detector.
-+ */
-+static int debug_max_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_max_fread - Read function for "max" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "max" debugfs
-+ * interface to the hardware latency detector. Can be used to determine
-+ * the maximum latency value observed since it was last reset.
-+ */
-+static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
-+				   size_t cnt, loff_t *ppos)
-+{
-+	return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
-+}
-+
-+/**
-+ * debug_max_fwrite - Write function for "max" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "max" debugfs
-+ * interface to the hardware latency detector. Can be used to reset the
-+ * maximum or set it to some other desired value - if, then, subsequent
-+ * measurements exceed this value, the maximum will be updated.
-+ */
-+static ssize_t  debug_max_fwrite(struct file *filp,
-+				     const char __user *ubuf,
-+				     size_t cnt,
-+				     loff_t *ppos)
-+{
-+	return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
-+}
-+
-+
-+/**
-+ * debug_sample_fopen - An open function for "sample" debugfs interface
-+ * @inode: The in-kernel inode representation of this debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function handles opening the "sample" file within the hardware
-+ * latency detector debugfs directory interface. This file is used to read
-+ * raw samples from the global ring_buffer and allows the user to see a
-+ * running latency history. Can be opened blocking or non-blocking,
-+ * affecting whether it behaves as a buffer read pipe, or does not.
-+ * Implements simple locking to prevent multiple simultaneous use.
-+ */
-+static int debug_sample_fopen(struct inode *inode, struct file *filp)
-+{
-+	if (!atomic_add_unless(&data.sample_open, 1, 1))
-+		return -EBUSY;
-+	else
-+		return 0;
-+}
-+
-+/**
-+ * debug_sample_fread - A read function for "sample" debugfs interface
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that will contain the samples read
-+ * @cnt: The maximum bytes to read from the debugfs "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function handles reading from the "sample" file within the hardware
-+ * latency detector debugfs directory interface. This file is used to read
-+ * raw samples from the global ring_buffer and allows the user to see a
-+ * running latency history. By default this will block pending a new
-+ * value written into the sample buffer, unless there are already a
-+ * number of value(s) waiting in the buffer, or the sample file was
-+ * previously opened in a non-blocking mode of operation.
-+ */
-+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
-+					size_t cnt, loff_t *ppos)
-+{
-+	int len = 0;
-+	char buf[64];
-+	struct sample *sample = NULL;
-+
-+	if (!enabled)
-+		return 0;
-+
-+	sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
-+	if (!sample)
-+		return -ENOMEM;
-+
-+	while (!buffer_get_sample(sample)) {
-+
-+		DEFINE_WAIT(wait);
-+
-+		if (filp->f_flags & O_NONBLOCK) {
-+			len = -EAGAIN;
-+			goto out;
-+		}
-+
-+		prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
-+		schedule();
-+		finish_wait(&data.wq, &wait);
-+
-+		if (signal_pending(current)) {
-+			len = -EINTR;
-+			goto out;
-+		}
-+
-+		if (!enabled) {			/* enable was toggled */
-+			len = 0;
-+			goto out;
-+		}
-+	}
-+
-+	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
-+		      sample->timestamp.tv_sec,
-+		      sample->timestamp.tv_nsec,
-+		      sample->duration);
-+
-+
-+	/* handling partial reads is more trouble than it's worth */
-+	if (len > cnt)
-+		goto out;
-+
-+	if (copy_to_user(ubuf, buf, len))
-+		len = -EFAULT;
-+
-+out:
-+	kfree(sample);
-+	return len;
-+}
-+
-+/**
-+ * debug_sample_release - Release function for "sample" debugfs interface
-+ * @inode: The in-kernel inode represenation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function completes the close of the debugfs interface "sample" file.
-+ * Frees the sample_open "lock" so that other users may open the interface.
-+ */
-+static int debug_sample_release(struct inode *inode, struct file *filp)
-+{
-+	atomic_dec(&data.sample_open);
-+
-+	return 0;
-+}
-+
-+/**
-+ * debug_threshold_fopen - Open function for "threshold" debugfs entry
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "threshold" debugfs
-+ * interface to the hardware latency detector.
-+ */
-+static int debug_threshold_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_threshold_fread - Read function for "threshold" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "threshold" debugfs
-+ * interface to the hardware latency detector. It can be used to determine
-+ * the current threshold level at which a latency will be recorded in the
-+ * global ring buffer, typically on the order of 10us.
-+ */
-+static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
-+					 size_t cnt, loff_t *ppos)
-+{
-+	return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
-+}
-+
-+/**
-+ * debug_threshold_fwrite - Write function for "threshold" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "threshold" debugfs
-+ * interface to the hardware latency detector. It can be used to configure
-+ * the threshold level at which any subsequently detected latencies will
-+ * be recorded into the global ring buffer.
-+ */
-+static ssize_t  debug_threshold_fwrite(struct file *filp,
-+					const char __user *ubuf,
-+					size_t cnt,
-+					loff_t *ppos)
-+{
-+	int ret;
-+
-+	ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
-+
-+	if (enabled)
-+		wake_up_process(kthread);
-+
-+	return ret;
-+}
-+
-+/**
-+ * debug_width_fopen - Open function for "width" debugfs entry
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "width" debugfs
-+ * interface to the hardware latency detector.
-+ */
-+static int debug_width_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_width_fread - Read function for "width" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "width" debugfs
-+ * interface to the hardware latency detector. It can be used to determine
-+ * for how many us of the total window us we will actively sample for any
-+ * hardware-induced latecy periods. Obviously, it is not possible to
-+ * sample constantly and have the system respond to a sample reader, or,
-+ * worse, without having the system appear to have gone out to lunch.
-+ */
-+static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
-+				     size_t cnt, loff_t *ppos)
-+{
-+	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
-+}
-+
-+/**
-+ * debug_width_fwrite - Write function for "width" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "width" debugfs
-+ * interface to the hardware latency detector. It can be used to configure
-+ * for how many us of the total window us we will actively sample for any
-+ * hardware-induced latency periods. Obviously, it is not possible to
-+ * sample constantly and have the system respond to a sample reader, or,
-+ * worse, without having the system appear to have gone out to lunch. It
-+ * is enforced that width is less that the total window size.
-+ */
-+static ssize_t  debug_width_fwrite(struct file *filp,
-+				       const char __user *ubuf,
-+				       size_t cnt,
-+				       loff_t *ppos)
-+{
-+	char buf[U64STR_SIZE];
-+	int csize = min(cnt, sizeof(buf));
-+	u64 val = 0;
-+	int err = 0;
-+
-+	memset(buf, '\0', sizeof(buf));
-+	if (copy_from_user(buf, ubuf, csize))
-+		return -EFAULT;
-+
-+	buf[U64STR_SIZE-1] = '\0';			/* just in case */
-+	err = kstrtoull(buf, 10, &val);
-+	if (err)
-+		return -EINVAL;
-+
-+	mutex_lock(&data.lock);
-+	if (val < data.sample_window)
-+		data.sample_width = val;
-+	else {
-+		mutex_unlock(&data.lock);
-+		return -EINVAL;
-+	}
-+	mutex_unlock(&data.lock);
-+
-+	if (enabled)
-+		wake_up_process(kthread);
-+
-+	return csize;
-+}
-+
-+/**
-+ * debug_window_fopen - Open function for "window" debugfs entry
-+ * @inode: The in-kernel inode representation of the debugfs "file"
-+ * @filp: The active open file structure for the debugfs "file"
-+ *
-+ * This function provides an open implementation for the "window" debugfs
-+ * interface to the hardware latency detector. The window is the total time
-+ * in us that will be considered one sample period. Conceptually, windows
-+ * occur back-to-back and contain a sample width period during which
-+ * actual sampling occurs.
-+ */
-+static int debug_window_fopen(struct inode *inode, struct file *filp)
-+{
-+	return 0;
-+}
-+
-+/**
-+ * debug_window_fread - Read function for "window" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The userspace provided buffer to read value into
-+ * @cnt: The maximum number of bytes to read
-+ * @ppos: The current "file" position
-+ *
-+ * This function provides a read implementation for the "window" debugfs
-+ * interface to the hardware latency detector. The window is the total time
-+ * in us that will be considered one sample period. Conceptually, windows
-+ * occur back-to-back and contain a sample width period during which
-+ * actual sampling occurs. Can be used to read the total window size.
-+ */
-+static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
-+				      size_t cnt, loff_t *ppos)
-+{
-+	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
-+}
-+
-+/**
-+ * debug_window_fwrite - Write function for "window" debugfs entry
-+ * @filp: The active open file structure for the debugfs "file"
-+ * @ubuf: The user buffer that contains the value to write
-+ * @cnt: The maximum number of bytes to write to "file"
-+ * @ppos: The current position in the debugfs "file"
-+ *
-+ * This function provides a write implementation for the "window" debufds
-+ * interface to the hardware latency detetector. The window is the total time
-+ * in us that will be considered one sample period. Conceptually, windows
-+ * occur back-to-back and contain a sample width period during which
-+ * actual sampling occurs. Can be used to write a new total window size. It
-+ * is enfoced that any value written must be greater than the sample width
-+ * size, or an error results.
-+ */
-+static ssize_t  debug_window_fwrite(struct file *filp,
-+					const char __user *ubuf,
-+					size_t cnt,
-+					loff_t *ppos)
-+{
-+	char buf[U64STR_SIZE];
-+	int csize = min(cnt, sizeof(buf));
-+	u64 val = 0;
-+	int err = 0;
-+
-+	memset(buf, '\0', sizeof(buf));
-+	if (copy_from_user(buf, ubuf, csize))
-+		return -EFAULT;
-+
-+	buf[U64STR_SIZE-1] = '\0';			/* just in case */
-+	err = kstrtoull(buf, 10, &val);
-+	if (err)
-+		return -EINVAL;
-+
-+	mutex_lock(&data.lock);
-+	if (data.sample_width < val)
-+		data.sample_window = val;
-+	else {
-+		mutex_unlock(&data.lock);
-+		return -EINVAL;
-+	}
-+	mutex_unlock(&data.lock);
-+
-+	return csize;
-+}
-+
-+/*
-+ * Function pointers for the "count" debugfs file operations
-+ */
-+static const struct file_operations count_fops = {
-+	.open		= debug_count_fopen,
-+	.read		= debug_count_fread,
-+	.write		= debug_count_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "enable" debugfs file operations
-+ */
-+static const struct file_operations enable_fops = {
-+	.open		= debug_enable_fopen,
-+	.read		= debug_enable_fread,
-+	.write		= debug_enable_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "max" debugfs file operations
-+ */
-+static const struct file_operations max_fops = {
-+	.open		= debug_max_fopen,
-+	.read		= debug_max_fread,
-+	.write		= debug_max_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "sample" debugfs file operations
-+ */
-+static const struct file_operations sample_fops = {
-+	.open		= debug_sample_fopen,
-+	.read		= debug_sample_fread,
-+	.release	= debug_sample_release,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "threshold" debugfs file operations
-+ */
-+static const struct file_operations threshold_fops = {
-+	.open		= debug_threshold_fopen,
-+	.read		= debug_threshold_fread,
-+	.write		= debug_threshold_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "width" debugfs file operations
-+ */
-+static const struct file_operations width_fops = {
-+	.open		= debug_width_fopen,
-+	.read		= debug_width_fread,
-+	.write		= debug_width_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/*
-+ * Function pointers for the "window" debugfs file operations
-+ */
-+static const struct file_operations window_fops = {
-+	.open		= debug_window_fopen,
-+	.read		= debug_window_fread,
-+	.write		= debug_window_fwrite,
-+	.owner		= THIS_MODULE,
-+};
-+
-+/**
-+ * init_debugfs - A function to initialize the debugfs interface files
-+ *
-+ * This function creates entries in debugfs for "hwlat_detector", including
-+ * files to read values from the detector, current samples, and the
-+ * maximum sample that has been captured since the hardware latency
-+ * dectector was started.
-+ */
-+static int init_debugfs(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	debug_dir = debugfs_create_dir(DRVNAME, NULL);
-+	if (!debug_dir)
-+		goto err_debug_dir;
-+
-+	debug_sample = debugfs_create_file("sample", 0444,
-+					       debug_dir, NULL,
-+					       &sample_fops);
-+	if (!debug_sample)
-+		goto err_sample;
-+
-+	debug_count = debugfs_create_file("count", 0444,
-+					      debug_dir, NULL,
-+					      &count_fops);
-+	if (!debug_count)
-+		goto err_count;
-+
-+	debug_max = debugfs_create_file("max", 0444,
-+					    debug_dir, NULL,
-+					    &max_fops);
-+	if (!debug_max)
-+		goto err_max;
-+
-+	debug_sample_window = debugfs_create_file("window", 0644,
-+						      debug_dir, NULL,
-+						      &window_fops);
-+	if (!debug_sample_window)
-+		goto err_window;
-+
-+	debug_sample_width = debugfs_create_file("width", 0644,
-+						     debug_dir, NULL,
-+						     &width_fops);
-+	if (!debug_sample_width)
-+		goto err_width;
-+
-+	debug_threshold = debugfs_create_file("threshold", 0644,
-+						  debug_dir, NULL,
-+						  &threshold_fops);
-+	if (!debug_threshold)
-+		goto err_threshold;
-+
-+	debug_enable = debugfs_create_file("enable", 0644,
-+					       debug_dir, &enabled,
-+					       &enable_fops);
-+	if (!debug_enable)
-+		goto err_enable;
-+
-+	else {
-+		ret = 0;
-+		goto out;
-+	}
-+
-+err_enable:
-+	debugfs_remove(debug_threshold);
-+err_threshold:
-+	debugfs_remove(debug_sample_width);
-+err_width:
-+	debugfs_remove(debug_sample_window);
-+err_window:
-+	debugfs_remove(debug_max);
-+err_max:
-+	debugfs_remove(debug_count);
-+err_count:
-+	debugfs_remove(debug_sample);
-+err_sample:
-+	debugfs_remove(debug_dir);
-+err_debug_dir:
-+out:
-+	return ret;
-+}
-+
-+/**
-+ * free_debugfs - A function to cleanup the debugfs file interface
-+ */
-+static void free_debugfs(void)
-+{
-+	/* could also use a debugfs_remove_recursive */
-+	debugfs_remove(debug_enable);
-+	debugfs_remove(debug_threshold);
-+	debugfs_remove(debug_sample_width);
-+	debugfs_remove(debug_sample_window);
-+	debugfs_remove(debug_max);
-+	debugfs_remove(debug_count);
-+	debugfs_remove(debug_sample);
-+	debugfs_remove(debug_dir);
-+}
-+
-+/**
-+ * detector_init - Standard module initialization code
-+ */
-+static int detector_init(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	pr_info(BANNER "version %s\n", VERSION);
-+
-+	ret = init_stats();
-+	if (ret)
-+		goto out;
-+
-+	ret = init_debugfs();
-+	if (ret)
-+		goto err_stats;
-+
-+	if (enabled)
-+		ret = start_kthread();
-+
-+	goto out;
-+
-+err_stats:
-+	ring_buffer_free(ring_buffer);
-+out:
-+	return ret;
-+
-+}
-+
-+/**
-+ * detector_exit - Standard module cleanup code
-+ */
-+static void detector_exit(void)
-+{
-+	int err;
-+
-+	if (enabled) {
-+		enabled = 0;
-+		err = stop_kthread();
-+		if (err)
-+			pr_err(BANNER "cannot stop kthread\n");
-+	}
-+
-+	free_debugfs();
-+	ring_buffer_free(ring_buffer);	/* free up the ring buffer */
-+
-+}
-+
-+module_init(detector_init);
-+module_exit(detector_exit);
diff --git a/patches/series b/patches/series
index 4979396..3fc42c6 100644
--- a/patches/series
+++ b/patches/series
@@ -153,13 +153,6 @@
 trace-latency-hist-Consider-new-argument-when-probin.patch
 trace_Use_rcuidle_version_for_preemptoff_hist_trace_point.patch
 
-# HW LATENCY DETECTOR - this really wants a rewrite
-hwlatdetect.patch
-hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch
-hwlat-detector-Use-trace_clock_local-if-available.patch
-hwlat-detector-Use-thread-instead-of-stop-machine.patch
-hwlat-detector-Don-t-ignore-threshold-module-paramet.patch
-
 ##################################################
 # REAL RT STUFF starts here
 ##################################################