| /* |
| * hwlat_detector.c - A simple Hardware Latency detector. |
| * |
| * Use this module to detect large system latencies induced by the behavior of |
| * certain underlying system hardware or firmware, independent of Linux itself. |
| * The code was developed originally to detect the presence of SMIs on Intel |
| * and AMD systems, although there is no dependency upon x86 herein. |
| * |
| * The classical example usage of this module is in detecting the presence of |
| * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a |
| * somewhat special form of hardware interrupt spawned from earlier CPU debug |
| * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge |
| * LPC (or other device) to generate a special interrupt under certain |
| * circumstances, for example, upon expiration of a special SMI timer device, |
| * due to certain external thermal readings, on certain I/O address accesses, |
| * and other situations. An SMI hits a special CPU pin, triggers a special |
| * SMI mode (complete with special memory map), and the OS is unaware. |
| * |
| * Although certain hardware-inducing latencies are necessary (for example, |
| * a modern system often requires an SMI handler for correct thermal control |
| * and remote management) they can wreak havoc upon any OS-level performance |
| * guarantees toward low-latency, especially when the OS is not even made |
| * aware of the presence of these interrupts. For this reason, we need a |
| * somewhat brute force mechanism to detect these interrupts. In this case, |
| * we do it by hogging all of the CPU(s) for configurable timer intervals, |
| * sampling the built-in CPU timer, looking for discontiguous readings. |
| * |
| * WARNING: This implementation necessarily introduces latencies. Therefore, |
| * you should NEVER use this module in a production environment |
| * requiring any kind of low-latency performance guarantee(s). |
| * |
| * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> |
| * |
| * Includes useful feedback from Clark Williams <clark@redhat.com> |
| * |
| * This file is licensed under the terms of the GNU General Public |
| * License version 2. This program is licensed "as is" without any |
| * warranty of any kind, whether express or implied. |
| */ |
| |
| #include <linux/module.h> |
| #include <linux/init.h> |
| #include <linux/ring_buffer.h> |
| #include <linux/time.h> |
| #include <linux/hrtimer.h> |
| #include <linux/kthread.h> |
| #include <linux/debugfs.h> |
| #include <linux/seq_file.h> |
| #include <linux/uaccess.h> |
| #include <linux/version.h> |
| #include <linux/delay.h> |
| #include <linux/slab.h> |
| #include <linux/trace_clock.h> |
| |
| #define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ |
| #define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ |
| #define U64STR_SIZE 22 /* 20 digits max */ |
| |
| #define VERSION "1.0.0" |
| #define BANNER "hwlat_detector: " |
| #define DRVNAME "hwlat_detector" |
| #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ |
| #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ |
| #define DEFAULT_LAT_THRESHOLD 10 /* 10us */ |
| |
| /* Module metadata */ |
| |
| MODULE_LICENSE("GPL"); |
| MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); |
| MODULE_DESCRIPTION("A simple hardware latency detector"); |
| MODULE_VERSION(VERSION); |
| |
| /* Module parameters */ |
| |
| static int debug; |
| static int enabled; |
| static int threshold; |
| |
| module_param(debug, int, 0); /* enable debug */ |
| module_param(enabled, int, 0); /* enable detector */ |
| module_param(threshold, int, 0); /* latency threshold */ |
| |
| /* Buffering and sampling */ |
| |
| static struct ring_buffer *ring_buffer; /* sample buffer */ |
| static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ |
| static unsigned long buf_size = BUF_SIZE_DEFAULT; |
| static struct task_struct *kthread; /* sampling thread */ |
| |
| /* DebugFS filesystem entries */ |
| |
| static struct dentry *debug_dir; /* debugfs directory */ |
| static struct dentry *debug_max; /* maximum TSC delta */ |
| static struct dentry *debug_count; /* total detect count */ |
| static struct dentry *debug_sample_width; /* sample width us */ |
| static struct dentry *debug_sample_window; /* sample window us */ |
| static struct dentry *debug_sample; /* raw samples us */ |
| static struct dentry *debug_threshold; /* threshold us */ |
| static struct dentry *debug_enable; /* enable/disable */ |
| |
| /* Individual samples and global state */ |
| |
| struct sample; /* latency sample */ |
| struct data; /* Global state */ |
| |
| /* Sampling functions */ |
| static int __buffer_add_sample(struct sample *sample); |
| static struct sample *buffer_get_sample(struct sample *sample); |
| |
| /* Threading and state */ |
| static int kthread_fn(void *unused); |
| static int start_kthread(void); |
| static int stop_kthread(void); |
| static void __reset_stats(void); |
| static int init_stats(void); |
| |
| /* Debugfs interface */ |
| static ssize_t simple_data_read(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos, const u64 *entry); |
| static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, |
| size_t cnt, loff_t *ppos, u64 *entry); |
| static int debug_sample_fopen(struct inode *inode, struct file *filp); |
| static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos); |
| static int debug_sample_release(struct inode *inode, struct file *filp); |
| static int debug_enable_fopen(struct inode *inode, struct file *filp); |
| static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos); |
| static ssize_t debug_enable_fwrite(struct file *file, |
| const char __user *user_buffer, |
| size_t user_size, loff_t *offset); |
| |
| /* Initialization functions */ |
| static int init_debugfs(void); |
| static void free_debugfs(void); |
| static int detector_init(void); |
| static void detector_exit(void); |
| |
| /* Individual latency samples are stored here when detected and packed into |
| * the ring_buffer circular buffer, where they are overwritten when |
| * more than buf_size/sizeof(sample) samples are received. */ |
| struct sample { |
| u64 seqnum; /* unique sequence */ |
| u64 duration; /* ktime delta */ |
| u64 outer_duration; /* ktime delta (outer loop) */ |
| struct timespec timestamp; /* wall time */ |
| unsigned long lost; |
| }; |
| |
| /* keep the global state somewhere. */ |
| static struct data { |
| |
| struct mutex lock; /* protect changes */ |
| |
| u64 count; /* total since reset */ |
| u64 max_sample; /* max hardware latency */ |
| u64 threshold; /* sample threshold level */ |
| |
| u64 sample_window; /* total sampling window (on+off) */ |
| u64 sample_width; /* active sampling portion of window */ |
| |
| atomic_t sample_open; /* whether the sample file is open */ |
| |
| wait_queue_head_t wq; /* waitqeue for new sample values */ |
| |
| } data; |
| |
| /** |
| * __buffer_add_sample - add a new latency sample recording to the ring buffer |
| * @sample: The new latency sample value |
| * |
| * This receives a new latency sample and records it in a global ring buffer. |
| * No additional locking is used in this case. |
| */ |
| static int __buffer_add_sample(struct sample *sample) |
| { |
| return ring_buffer_write(ring_buffer, |
| sizeof(struct sample), sample); |
| } |
| |
| /** |
| * buffer_get_sample - remove a hardware latency sample from the ring buffer |
| * @sample: Pre-allocated storage for the sample |
| * |
| * This retrieves a hardware latency sample from the global circular buffer |
| */ |
| static struct sample *buffer_get_sample(struct sample *sample) |
| { |
| struct ring_buffer_event *e = NULL; |
| struct sample *s = NULL; |
| unsigned int cpu = 0; |
| |
| if (!sample) |
| return NULL; |
| |
| mutex_lock(&ring_buffer_mutex); |
| for_each_online_cpu(cpu) { |
| e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); |
| if (e) |
| break; |
| } |
| |
| if (e) { |
| s = ring_buffer_event_data(e); |
| memcpy(sample, s, sizeof(struct sample)); |
| } else |
| sample = NULL; |
| mutex_unlock(&ring_buffer_mutex); |
| |
| return sample; |
| } |
| |
| #ifndef CONFIG_TRACING |
| #define time_type ktime_t |
| #define time_get() ktime_get() |
| #define time_to_us(x) ktime_to_us(x) |
| #define time_sub(a, b) ktime_sub(a, b) |
| #define init_time(a, b) (a).tv64 = b |
| #define time_u64(a) (a).tv64 |
| #else |
| #define time_type u64 |
| #define time_get() trace_clock_local() |
| #define time_to_us(x) ((x) / 1000) |
| #define time_sub(a, b) ((a) - (b)) |
| #define init_time(a, b) a = b |
| #define time_u64(a) a |
| #endif |
| /** |
| * get_sample - sample the CPU TSC and look for likely hardware latencies |
| * |
| * Used to repeatedly capture the CPU TSC (or similar), looking for potential |
| * hardware-induced latency. Called with interrupts disabled and with data.lock held. |
| */ |
| static int get_sample(void) |
| { |
| time_type start, t1, t2, last_t2; |
| s64 diff, total = 0; |
| u64 sample = 0; |
| u64 outer_sample = 0; |
| int ret = -1; |
| |
| init_time(last_t2, 0); |
| start = time_get(); /* start timestamp */ |
| |
| do { |
| |
| t1 = time_get(); /* we'll look for a discontinuity */ |
| t2 = time_get(); |
| |
| if (time_u64(last_t2)) { |
| /* Check the delta from the outer loop (t2 to next t1) */ |
| diff = time_to_us(time_sub(t1, last_t2)); |
| /* This shouldn't happen */ |
| if (diff < 0) { |
| printk(KERN_ERR BANNER "time running backwards\n"); |
| goto out; |
| } |
| if (diff > outer_sample) |
| outer_sample = diff; |
| } |
| last_t2 = t2; |
| |
| total = time_to_us(time_sub(t2, start)); /* sample width */ |
| |
| /* This checks the inner loop (t1 to t2) */ |
| diff = time_to_us(time_sub(t2, t1)); /* current diff */ |
| |
| /* This shouldn't happen */ |
| if (diff < 0) { |
| printk(KERN_ERR BANNER "time running backwards\n"); |
| goto out; |
| } |
| |
| if (diff > sample) |
| sample = diff; /* only want highest value */ |
| |
| } while (total <= data.sample_width); |
| |
| ret = 0; |
| |
| /* If we exceed the threshold value, we have found a hardware latency */ |
| if (sample > data.threshold || outer_sample > data.threshold) { |
| struct sample s; |
| |
| ret = 1; |
| |
| data.count++; |
| s.seqnum = data.count; |
| s.duration = sample; |
| s.outer_duration = outer_sample; |
| s.timestamp = CURRENT_TIME; |
| __buffer_add_sample(&s); |
| |
| /* Keep a running maximum ever recorded hardware latency */ |
| if (sample > data.max_sample) |
| data.max_sample = sample; |
| } |
| |
| out: |
| return ret; |
| } |
| |
| /* |
| * kthread_fn - The CPU time sampling/hardware latency detection kernel thread |
| * @unused: A required part of the kthread API. |
| * |
| * Used to periodically sample the CPU TSC via a call to get_sample. We |
| * disable interrupts, which does (intentionally) introduce latency since we |
| * need to ensure nothing else might be running (and thus pre-empting). |
| * Obviously this should never be used in production environments. |
| * |
| * Currently this runs on which ever CPU it was scheduled on, but most |
| * real-worald hardware latency situations occur across several CPUs, |
| * but we might later generalize this if we find there are any actualy |
| * systems with alternate SMI delivery or other hardware latencies. |
| */ |
| static int kthread_fn(void *unused) |
| { |
| int ret; |
| u64 interval; |
| |
| while (!kthread_should_stop()) { |
| |
| mutex_lock(&data.lock); |
| |
| local_irq_disable(); |
| ret = get_sample(); |
| local_irq_enable(); |
| |
| if (ret > 0) |
| wake_up(&data.wq); /* wake up reader(s) */ |
| |
| interval = data.sample_window - data.sample_width; |
| do_div(interval, USEC_PER_MSEC); /* modifies interval value */ |
| |
| mutex_unlock(&data.lock); |
| |
| if (msleep_interruptible(interval)) |
| break; |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * start_kthread - Kick off the hardware latency sampling/detector kthread |
| * |
| * This starts a kernel thread that will sit and sample the CPU timestamp |
| * counter (TSC or similar) and look for potential hardware latencies. |
| */ |
| static int start_kthread(void) |
| { |
| kthread = kthread_run(kthread_fn, NULL, |
| DRVNAME); |
| if (IS_ERR(kthread)) { |
| printk(KERN_ERR BANNER "could not start sampling thread\n"); |
| enabled = 0; |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * stop_kthread - Inform the hardware latency samping/detector kthread to stop |
| * |
| * This kicks the running hardware latency sampling/detector kernel thread and |
| * tells it to stop sampling now. Use this on unload and at system shutdown. |
| */ |
| static int stop_kthread(void) |
| { |
| int ret; |
| |
| ret = kthread_stop(kthread); |
| |
| return ret; |
| } |
| |
| /** |
| * __reset_stats - Reset statistics for the hardware latency detector |
| * |
| * We use data to store various statistics and global state. We call this |
| * function in order to reset those when "enable" is toggled on or off, and |
| * also at initialization. Should be called with data.lock held. |
| */ |
| static void __reset_stats(void) |
| { |
| data.count = 0; |
| data.max_sample = 0; |
| ring_buffer_reset(ring_buffer); /* flush out old sample entries */ |
| } |
| |
| /** |
| * init_stats - Setup global state statistics for the hardware latency detector |
| * |
| * We use data to store various statistics and global state. We also use |
| * a global ring buffer (ring_buffer) to keep raw samples of detected hardware |
| * induced system latencies. This function initializes these structures and |
| * allocates the global ring buffer also. |
| */ |
| static int init_stats(void) |
| { |
| int ret = -ENOMEM; |
| |
| mutex_init(&data.lock); |
| init_waitqueue_head(&data.wq); |
| atomic_set(&data.sample_open, 0); |
| |
| ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); |
| |
| if (WARN(!ring_buffer, KERN_ERR BANNER |
| "failed to allocate ring buffer!\n")) |
| goto out; |
| |
| __reset_stats(); |
| data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */ |
| data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ |
| data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ |
| |
| ret = 0; |
| |
| out: |
| return ret; |
| |
| } |
| |
| /* |
| * simple_data_read - Wrapper read function for global state debugfs entries |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * @entry: The entry to read from |
| * |
| * This function provides a generic read implementation for the global state |
| * "data" structure debugfs filesystem entries. It would be nice to use |
| * simple_attr_read directly, but we need to make sure that the data.lock |
| * is held during the actual read. |
| */ |
| static ssize_t simple_data_read(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos, const u64 *entry) |
| { |
| char buf[U64STR_SIZE]; |
| u64 val = 0; |
| int len = 0; |
| |
| memset(buf, 0, sizeof(buf)); |
| |
| if (!entry) |
| return -EFAULT; |
| |
| mutex_lock(&data.lock); |
| val = *entry; |
| mutex_unlock(&data.lock); |
| |
| len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); |
| |
| return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); |
| |
| } |
| |
| /* |
| * simple_data_write - Wrapper write function for global state debugfs entries |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to write value from |
| * @cnt: The maximum number of bytes to write |
| * @ppos: The current "file" position |
| * @entry: The entry to write to |
| * |
| * This function provides a generic write implementation for the global state |
| * "data" structure debugfs filesystem entries. It would be nice to use |
| * simple_attr_write directly, but we need to make sure that the data.lock |
| * is held during the actual write. |
| */ |
| static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, |
| size_t cnt, loff_t *ppos, u64 *entry) |
| { |
| char buf[U64STR_SIZE]; |
| int csize = min(cnt, sizeof(buf)); |
| u64 val = 0; |
| int err = 0; |
| |
| memset(buf, '\0', sizeof(buf)); |
| if (copy_from_user(buf, ubuf, csize)) |
| return -EFAULT; |
| |
| buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| err = strict_strtoull(buf, 10, &val); |
| if (err) |
| return -EINVAL; |
| |
| mutex_lock(&data.lock); |
| *entry = val; |
| mutex_unlock(&data.lock); |
| |
| return csize; |
| } |
| |
| /** |
| * debug_count_fopen - Open function for "count" debugfs entry |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "count" debugfs |
| * interface to the hardware latency detector. |
| */ |
| static int debug_count_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_count_fread - Read function for "count" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "count" debugfs |
| * interface to the hardware latency detector. Can be used to read the |
| * number of latency readings exceeding the configured threshold since |
| * the detector was last reset (e.g. by writing a zero into "count"). |
| */ |
| static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| return simple_data_read(filp, ubuf, cnt, ppos, &data.count); |
| } |
| |
| /** |
| * debug_count_fwrite - Write function for "count" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "count" debugfs |
| * interface to the hardware latency detector. Can be used to write a |
| * desired value, especially to zero the total count. |
| */ |
| static ssize_t debug_count_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| return simple_data_write(filp, ubuf, cnt, ppos, &data.count); |
| } |
| |
| /** |
| * debug_enable_fopen - Dummy open function for "enable" debugfs interface |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "enable" debugfs |
| * interface to the hardware latency detector. |
| */ |
| static int debug_enable_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_enable_fread - Read function for "enable" debugfs interface |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "enable" debugfs |
| * interface to the hardware latency detector. Can be used to determine |
| * whether the detector is currently enabled ("0\n" or "1\n" returned). |
| */ |
| static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| char buf[4]; |
| |
| if ((cnt < sizeof(buf)) || (*ppos)) |
| return 0; |
| |
| buf[0] = enabled ? '1' : '0'; |
| buf[1] = '\n'; |
| buf[2] = '\0'; |
| if (copy_to_user(ubuf, buf, strlen(buf))) |
| return -EFAULT; |
| return *ppos = strlen(buf); |
| } |
| |
| /** |
| * debug_enable_fwrite - Write function for "enable" debugfs interface |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "enable" debugfs |
| * interface to the hardware latency detector. Can be used to enable or |
| * disable the detector, which will have the side-effect of possibly |
| * also resetting the global stats and kicking off the measuring |
| * kthread (on an enable) or the converse (upon a disable). |
| */ |
| static ssize_t debug_enable_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| char buf[4]; |
| int csize = min(cnt, sizeof(buf)); |
| long val = 0; |
| int err = 0; |
| |
| memset(buf, '\0', sizeof(buf)); |
| if (copy_from_user(buf, ubuf, csize)) |
| return -EFAULT; |
| |
| buf[sizeof(buf)-1] = '\0'; /* just in case */ |
| err = strict_strtoul(buf, 10, &val); |
| if (0 != err) |
| return -EINVAL; |
| |
| if (val) { |
| if (enabled) |
| goto unlock; |
| enabled = 1; |
| __reset_stats(); |
| if (start_kthread()) |
| return -EFAULT; |
| } else { |
| if (!enabled) |
| goto unlock; |
| enabled = 0; |
| err = stop_kthread(); |
| if (err) { |
| printk(KERN_ERR BANNER "cannot stop kthread\n"); |
| return -EFAULT; |
| } |
| wake_up(&data.wq); /* reader(s) should return */ |
| } |
| unlock: |
| return csize; |
| } |
| |
| /** |
| * debug_max_fopen - Open function for "max" debugfs entry |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "max" debugfs |
| * interface to the hardware latency detector. |
| */ |
| static int debug_max_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_max_fread - Read function for "max" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "max" debugfs |
| * interface to the hardware latency detector. Can be used to determine |
| * the maximum latency value observed since it was last reset. |
| */ |
| static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); |
| } |
| |
| /** |
| * debug_max_fwrite - Write function for "max" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "max" debugfs |
| * interface to the hardware latency detector. Can be used to reset the |
| * maximum or set it to some other desired value - if, then, subsequent |
| * measurements exceed this value, the maximum will be updated. |
| */ |
| static ssize_t debug_max_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); |
| } |
| |
| |
| /** |
| * debug_sample_fopen - An open function for "sample" debugfs interface |
| * @inode: The in-kernel inode representation of this debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function handles opening the "sample" file within the hardware |
| * latency detector debugfs directory interface. This file is used to read |
| * raw samples from the global ring_buffer and allows the user to see a |
| * running latency history. Can be opened blocking or non-blocking, |
| * affecting whether it behaves as a buffer read pipe, or does not. |
| * Implements simple locking to prevent multiple simultaneous use. |
| */ |
| static int debug_sample_fopen(struct inode *inode, struct file *filp) |
| { |
| if (!atomic_add_unless(&data.sample_open, 1, 1)) |
| return -EBUSY; |
| else |
| return 0; |
| } |
| |
| /** |
| * debug_sample_fread - A read function for "sample" debugfs interface |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that will contain the samples read |
| * @cnt: The maximum bytes to read from the debugfs "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function handles reading from the "sample" file within the hardware |
| * latency detector debugfs directory interface. This file is used to read |
| * raw samples from the global ring_buffer and allows the user to see a |
| * running latency history. By default this will block pending a new |
| * value written into the sample buffer, unless there are already a |
| * number of value(s) waiting in the buffer, or the sample file was |
| * previously opened in a non-blocking mode of operation. |
| */ |
| static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| int len = 0; |
| char buf[64]; |
| struct sample *sample = NULL; |
| |
| if (!enabled) |
| return 0; |
| |
| sample = kzalloc(sizeof(struct sample), GFP_KERNEL); |
| if (!sample) |
| return -ENOMEM; |
| |
| while (!buffer_get_sample(sample)) { |
| |
| DEFINE_WAIT(wait); |
| |
| if (filp->f_flags & O_NONBLOCK) { |
| len = -EAGAIN; |
| goto out; |
| } |
| |
| prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); |
| schedule(); |
| finish_wait(&data.wq, &wait); |
| |
| if (signal_pending(current)) { |
| len = -EINTR; |
| goto out; |
| } |
| |
| if (!enabled) { /* enable was toggled */ |
| len = 0; |
| goto out; |
| } |
| } |
| |
| len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", |
| sample->timestamp.tv_sec, |
| sample->timestamp.tv_nsec, |
| sample->duration, |
| sample->outer_duration); |
| |
| |
| /* handling partial reads is more trouble than it's worth */ |
| if (len > cnt) |
| goto out; |
| |
| if (copy_to_user(ubuf, buf, len)) |
| len = -EFAULT; |
| |
| out: |
| kfree(sample); |
| return len; |
| } |
| |
| /** |
| * debug_sample_release - Release function for "sample" debugfs interface |
| * @inode: The in-kernel inode represenation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function completes the close of the debugfs interface "sample" file. |
| * Frees the sample_open "lock" so that other users may open the interface. |
| */ |
| static int debug_sample_release(struct inode *inode, struct file *filp) |
| { |
| atomic_dec(&data.sample_open); |
| |
| return 0; |
| } |
| |
| /** |
| * debug_threshold_fopen - Open function for "threshold" debugfs entry |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "threshold" debugfs |
| * interface to the hardware latency detector. |
| */ |
| static int debug_threshold_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_threshold_fread - Read function for "threshold" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "threshold" debugfs |
| * interface to the hardware latency detector. It can be used to determine |
| * the current threshold level at which a latency will be recorded in the |
| * global ring buffer, typically on the order of 10us. |
| */ |
| static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); |
| } |
| |
| /** |
| * debug_threshold_fwrite - Write function for "threshold" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "threshold" debugfs |
| * interface to the hardware latency detector. It can be used to configure |
| * the threshold level at which any subsequently detected latencies will |
| * be recorded into the global ring buffer. |
| */ |
| static ssize_t debug_threshold_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| int ret; |
| |
| ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); |
| |
| if (enabled) |
| wake_up_process(kthread); |
| |
| return ret; |
| } |
| |
| /** |
| * debug_width_fopen - Open function for "width" debugfs entry |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "width" debugfs |
| * interface to the hardware latency detector. |
| */ |
| static int debug_width_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_width_fread - Read function for "width" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "width" debugfs |
| * interface to the hardware latency detector. It can be used to determine |
| * for how many us of the total window us we will actively sample for any |
| * hardware-induced latecy periods. Obviously, it is not possible to |
| * sample constantly and have the system respond to a sample reader, or, |
| * worse, without having the system appear to have gone out to lunch. |
| */ |
| static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); |
| } |
| |
| /** |
| * debug_width_fwrite - Write function for "width" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "width" debugfs |
| * interface to the hardware latency detector. It can be used to configure |
| * for how many us of the total window us we will actively sample for any |
| * hardware-induced latency periods. Obviously, it is not possible to |
| * sample constantly and have the system respond to a sample reader, or, |
| * worse, without having the system appear to have gone out to lunch. It |
| * is enforced that width is less that the total window size. |
| */ |
| static ssize_t debug_width_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| char buf[U64STR_SIZE]; |
| int csize = min(cnt, sizeof(buf)); |
| u64 val = 0; |
| int err = 0; |
| |
| memset(buf, '\0', sizeof(buf)); |
| if (copy_from_user(buf, ubuf, csize)) |
| return -EFAULT; |
| |
| buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| err = strict_strtoull(buf, 10, &val); |
| if (0 != err) |
| return -EINVAL; |
| |
| mutex_lock(&data.lock); |
| if (val < data.sample_window) |
| data.sample_width = val; |
| else { |
| mutex_unlock(&data.lock); |
| return -EINVAL; |
| } |
| mutex_unlock(&data.lock); |
| |
| if (enabled) |
| wake_up_process(kthread); |
| |
| return csize; |
| } |
| |
| /** |
| * debug_window_fopen - Open function for "window" debugfs entry |
| * @inode: The in-kernel inode representation of the debugfs "file" |
| * @filp: The active open file structure for the debugfs "file" |
| * |
| * This function provides an open implementation for the "window" debugfs |
| * interface to the hardware latency detector. The window is the total time |
| * in us that will be considered one sample period. Conceptually, windows |
| * occur back-to-back and contain a sample width period during which |
| * actual sampling occurs. |
| */ |
| static int debug_window_fopen(struct inode *inode, struct file *filp) |
| { |
| return 0; |
| } |
| |
| /** |
| * debug_window_fread - Read function for "window" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The userspace provided buffer to read value into |
| * @cnt: The maximum number of bytes to read |
| * @ppos: The current "file" position |
| * |
| * This function provides a read implementation for the "window" debugfs |
| * interface to the hardware latency detector. The window is the total time |
| * in us that will be considered one sample period. Conceptually, windows |
| * occur back-to-back and contain a sample width period during which |
| * actual sampling occurs. Can be used to read the total window size. |
| */ |
| static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, |
| size_t cnt, loff_t *ppos) |
| { |
| return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); |
| } |
| |
| /** |
| * debug_window_fwrite - Write function for "window" debugfs entry |
| * @filp: The active open file structure for the debugfs "file" |
| * @ubuf: The user buffer that contains the value to write |
| * @cnt: The maximum number of bytes to write to "file" |
| * @ppos: The current position in the debugfs "file" |
| * |
| * This function provides a write implementation for the "window" debufds |
| * interface to the hardware latency detetector. The window is the total time |
| * in us that will be considered one sample period. Conceptually, windows |
| * occur back-to-back and contain a sample width period during which |
| * actual sampling occurs. Can be used to write a new total window size. It |
| * is enfoced that any value written must be greater than the sample width |
| * size, or an error results. |
| */ |
| static ssize_t debug_window_fwrite(struct file *filp, |
| const char __user *ubuf, |
| size_t cnt, |
| loff_t *ppos) |
| { |
| char buf[U64STR_SIZE]; |
| int csize = min(cnt, sizeof(buf)); |
| u64 val = 0; |
| int err = 0; |
| |
| memset(buf, '\0', sizeof(buf)); |
| if (copy_from_user(buf, ubuf, csize)) |
| return -EFAULT; |
| |
| buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| err = strict_strtoull(buf, 10, &val); |
| if (0 != err) |
| return -EINVAL; |
| |
| mutex_lock(&data.lock); |
| if (data.sample_width < val) |
| data.sample_window = val; |
| else { |
| mutex_unlock(&data.lock); |
| return -EINVAL; |
| } |
| mutex_unlock(&data.lock); |
| |
| return csize; |
| } |
| |
| /* |
| * Function pointers for the "count" debugfs file operations |
| */ |
| static const struct file_operations count_fops = { |
| .open = debug_count_fopen, |
| .read = debug_count_fread, |
| .write = debug_count_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "enable" debugfs file operations |
| */ |
| static const struct file_operations enable_fops = { |
| .open = debug_enable_fopen, |
| .read = debug_enable_fread, |
| .write = debug_enable_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "max" debugfs file operations |
| */ |
| static const struct file_operations max_fops = { |
| .open = debug_max_fopen, |
| .read = debug_max_fread, |
| .write = debug_max_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "sample" debugfs file operations |
| */ |
| static const struct file_operations sample_fops = { |
| .open = debug_sample_fopen, |
| .read = debug_sample_fread, |
| .release = debug_sample_release, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "threshold" debugfs file operations |
| */ |
| static const struct file_operations threshold_fops = { |
| .open = debug_threshold_fopen, |
| .read = debug_threshold_fread, |
| .write = debug_threshold_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "width" debugfs file operations |
| */ |
| static const struct file_operations width_fops = { |
| .open = debug_width_fopen, |
| .read = debug_width_fread, |
| .write = debug_width_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /* |
| * Function pointers for the "window" debugfs file operations |
| */ |
| static const struct file_operations window_fops = { |
| .open = debug_window_fopen, |
| .read = debug_window_fread, |
| .write = debug_window_fwrite, |
| .owner = THIS_MODULE, |
| }; |
| |
| /** |
| * init_debugfs - A function to initialize the debugfs interface files |
| * |
| * This function creates entries in debugfs for "hwlat_detector", including |
| * files to read values from the detector, current samples, and the |
| * maximum sample that has been captured since the hardware latency |
| * dectector was started. |
| */ |
| static int init_debugfs(void) |
| { |
| int ret = -ENOMEM; |
| |
| debug_dir = debugfs_create_dir(DRVNAME, NULL); |
| if (!debug_dir) |
| goto err_debug_dir; |
| |
| debug_sample = debugfs_create_file("sample", 0444, |
| debug_dir, NULL, |
| &sample_fops); |
| if (!debug_sample) |
| goto err_sample; |
| |
| debug_count = debugfs_create_file("count", 0444, |
| debug_dir, NULL, |
| &count_fops); |
| if (!debug_count) |
| goto err_count; |
| |
| debug_max = debugfs_create_file("max", 0444, |
| debug_dir, NULL, |
| &max_fops); |
| if (!debug_max) |
| goto err_max; |
| |
| debug_sample_window = debugfs_create_file("window", 0644, |
| debug_dir, NULL, |
| &window_fops); |
| if (!debug_sample_window) |
| goto err_window; |
| |
| debug_sample_width = debugfs_create_file("width", 0644, |
| debug_dir, NULL, |
| &width_fops); |
| if (!debug_sample_width) |
| goto err_width; |
| |
| debug_threshold = debugfs_create_file("threshold", 0644, |
| debug_dir, NULL, |
| &threshold_fops); |
| if (!debug_threshold) |
| goto err_threshold; |
| |
| debug_enable = debugfs_create_file("enable", 0644, |
| debug_dir, &enabled, |
| &enable_fops); |
| if (!debug_enable) |
| goto err_enable; |
| |
| else { |
| ret = 0; |
| goto out; |
| } |
| |
| err_enable: |
| debugfs_remove(debug_threshold); |
| err_threshold: |
| debugfs_remove(debug_sample_width); |
| err_width: |
| debugfs_remove(debug_sample_window); |
| err_window: |
| debugfs_remove(debug_max); |
| err_max: |
| debugfs_remove(debug_count); |
| err_count: |
| debugfs_remove(debug_sample); |
| err_sample: |
| debugfs_remove(debug_dir); |
| err_debug_dir: |
| out: |
| return ret; |
| } |
| |
| /** |
| * free_debugfs - A function to cleanup the debugfs file interface |
| */ |
| static void free_debugfs(void) |
| { |
| /* could also use a debugfs_remove_recursive */ |
| debugfs_remove(debug_enable); |
| debugfs_remove(debug_threshold); |
| debugfs_remove(debug_sample_width); |
| debugfs_remove(debug_sample_window); |
| debugfs_remove(debug_max); |
| debugfs_remove(debug_count); |
| debugfs_remove(debug_sample); |
| debugfs_remove(debug_dir); |
| } |
| |
| /** |
| * detector_init - Standard module initialization code |
| */ |
| static int detector_init(void) |
| { |
| int ret = -ENOMEM; |
| |
| printk(KERN_INFO BANNER "version %s\n", VERSION); |
| |
| ret = init_stats(); |
| if (0 != ret) |
| goto out; |
| |
| ret = init_debugfs(); |
| if (0 != ret) |
| goto err_stats; |
| |
| if (enabled) |
| ret = start_kthread(); |
| |
| goto out; |
| |
| err_stats: |
| ring_buffer_free(ring_buffer); |
| out: |
| return ret; |
| |
| } |
| |
| /** |
| * detector_exit - Standard module cleanup code |
| */ |
| static void detector_exit(void) |
| { |
| int err; |
| |
| if (enabled) { |
| enabled = 0; |
| err = stop_kthread(); |
| if (err) |
| printk(KERN_ERR BANNER "cannot stop kthread\n"); |
| } |
| |
| free_debugfs(); |
| ring_buffer_free(ring_buffer); /* free up the ring buffer */ |
| |
| } |
| |
| module_init(detector_init); |
| module_exit(detector_exit); |