| #!/usr/bin/env python3 |
| # Note: this script is python2 and python3 compatible. |
| |
| """ |
| fio_jsonplus_clat2csv |
| |
| This script converts fio's json+ latency data to CSV format. |
| |
| For example: |
| |
| Run the following fio jobs: |
| $ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \ |
| --time_based --runtime=3s --size=1G --slat_percentiles=1 \ |
| --clat_percentiles=1 --lat_percentiles=1 \ |
| --name=test1 --rw=randrw \ |
| --name=test2 --rw=read \ |
| --name=test3 --rw=write |
| |
| Then run: |
| $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv |
| |
| You will end up with the following 3 files: |
| |
| -rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv |
| -rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv |
| -rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv |
| |
| fio-jsonplus_job0.csv will look something like: |
| |
| nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile, |
| 12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , , |
| 13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , , |
| 14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , , |
| 15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , , |
| 16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , , |
| 17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , , |
| 18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , , |
| 19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , , |
| 20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , , |
| ... |
| 168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , , |
| 177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , , |
| 183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , , |
| 189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , , |
| 214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , , |
| 246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , , |
| 272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , , |
| 329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , , |
| 1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , , |
| 1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , , |
| |
| The first line says that there were three read IOs with 12ns clat, |
| the cumulative number of read IOs at or below 12ns was two, and |
| 12ns was the 0.0000611th percentile for read latency. There were |
| two write IOs with 12ns clat, the cumulative number of write IOs |
| at or below 12ns was two, and 12ns was the 0.0000408th percentile |
| for write latency. |
| |
| The job had one write IO complete at 168960ns and 4906999 write IOs |
| completed at or below this duration. Also this duration was the |
| 99.99994th percentile for write latency. There was one write IO |
| with a total latency of 168960ns, this duration had a cumulative |
| frequency of 4906997 write IOs and was the 99.9998981048th percentile |
| for write total latency. |
| |
| The last line says that one read IO had 1089536ns total latency, this |
| duration had a cumulative frequency of 4909929 and represented the 100th |
| percentile for read total latency. |
| |
| Running the following: |
| |
| $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate |
| fio-jsonplus_job0.csv validated |
| fio-jsonplus_job1.csv validated |
| fio-jsonplus_job2.csv validated |
| |
| will check the CSV data against the json+ output to confirm that the CSV |
| data matches. |
| """ |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| import os |
| import json |
| import argparse |
| import itertools |
| import six |
| |
| DDIR_LIST = ['read', 'write', 'trim'] |
| LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns'] |
| |
| def parse_args(): |
| """Parse command-line arguments.""" |
| |
| parser = argparse.ArgumentParser() |
| parser.add_argument('source', |
| help='fio json+ output file containing completion ' |
| 'latency data') |
| parser.add_argument('dest', |
| help='destination file stub for latency data in CSV ' |
| 'format. job number will be appended to filename') |
| parser.add_argument('--debug', '-d', action='store_true', |
| help='enable debug prints') |
| parser.add_argument('--validate', action='store_true', |
| help='validate CSV against JSON output') |
| args = parser.parse_args() |
| |
| return args |
| |
| |
| def percentile(idx, run_total): |
| """Return a percentile for a specified index based on a running total. |
| |
| Parameters: |
| idx index for which to generate percentile. |
| run_total list of cumulative sums. |
| |
| Returns: |
| Percentile represented by the specified index. |
| """ |
| |
| total = run_total[len(run_total)-1] |
| if total == 0: |
| return 0 |
| |
| return float(run_total[idx]) / total |
| |
| |
| def more_bins(indices, bins): |
| """Determine whether we have more bins to process. |
| |
| Parameters: |
| indices a dict containing the last index processed in each bin. |
| bins a dict contaiing a set of bins to process. |
| |
| Returns: |
| True if the indices do not yet point to the end of each bin in bins. |
| False if the indices point beyond their repsective bins. |
| """ |
| |
| for key, value in six.iteritems(indices): |
| if value < len(bins[key]): |
| return True |
| |
| return False |
| |
| |
| def debug_print(debug, *args): |
| """Print debug messages. |
| |
| Parameters: |
| debug emit messages if True. |
| *args arguments for print(). |
| """ |
| |
| if debug: |
| print(*args) |
| |
| |
| def get_csvfile(dest, jobnum): |
| """Generate CSV filename from command-line arguments and job numbers. |
| |
| Paramaters: |
| dest file specification for CSV filename. |
| jobnum job number. |
| |
| Returns: |
| A string that is a new filename that incorporates the job number. |
| """ |
| |
| stub, ext = os.path.splitext(dest) |
| return stub + '_job' + str(jobnum) + ext |
| |
| |
| def validate(args, jsondata, col_labels): |
| """Validate CSV data against json+ output. |
| |
| This function checks the CSV data to make sure that it was correctly |
| generated from the original json+ output. json+ 'bins' objects are |
| constructed from the CSV data and then compared to the corresponding |
| objects in the json+ data. An AssertionError will appear if a mismatch |
| is found. |
| |
| Percentiles and cumulative counts are not checked. |
| |
| Parameters: |
| args command-line arguments for this script. |
| jsondata json+ output to compare against. |
| col_labels column labels for CSV data. |
| |
| Returns |
| 0 if no mismatches found. |
| """ |
| |
| colnames = [c.strip() for c in col_labels.split(',')] |
| |
| for jobnum in range(len(jsondata['jobs'])): |
| job_data = jsondata['jobs'][jobnum] |
| csvfile = get_csvfile(args.dest, jobnum) |
| |
| with open(csvfile, 'r') as csvsource: |
| csvlines = csvsource.read().split('\n') |
| |
| assert csvlines[0] == col_labels |
| debug_print(args.debug, 'col_labels match for', csvfile) |
| |
| # create 'bins' objects from the CSV data |
| counts = {} |
| for ddir in DDIR_LIST: |
| counts[ddir] = {} |
| for lat in LAT_LIST: |
| counts[ddir][lat] = {} |
| |
| csvlines.pop(0) |
| for line in csvlines: |
| if line.strip() == "": |
| continue |
| values = line.split(',') |
| nsec = values[0] |
| for col in colnames: |
| if 'count' in col: |
| val = values[colnames.index(col)] |
| if val.strip() != "": |
| count = int(val) |
| ddir, lat, _, _ = col.split('_') |
| lat = lat + '_ns' |
| counts[ddir][lat][nsec] = count |
| try: |
| assert count == job_data[ddir][lat]['bins'][nsec] |
| except Exception: |
| print("mismatch:", csvfile, ddir, lat, nsec, "ns") |
| return 1 |
| |
| # compare 'bins' objects created from the CSV data |
| # with corresponding 'bins' objects in the json+ output |
| for ddir in DDIR_LIST: |
| for lat in LAT_LIST: |
| if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]: |
| assert job_data[ddir][lat]['bins'] == counts[ddir][lat] |
| debug_print(args.debug, csvfile, ddir, lat, "bins match") |
| else: |
| assert counts[ddir][lat] == {} |
| debug_print(args.debug, csvfile, ddir, lat, "bins empty") |
| |
| print(csvfile, "validated") |
| |
| return 0 |
| |
| |
| def main(): |
| """Starting point for this script. |
| |
| In standard mode, this script will generate CSV data from fio json+ output. |
| In validation mode it will check to make sure that counts in CSV files |
| match the counts in the json+ data. |
| """ |
| |
| args = parse_args() |
| |
| with open(args.source, 'r') as source: |
| jsondata = json.loads(source.read()) |
| |
| ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST)) |
| debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list) |
| col_labels = 'nsec, ' |
| for ddir_lat in ddir_lat_list: |
| col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat) |
| debug_print(args.debug, 'col_labels: ', col_labels) |
| |
| if args.validate: |
| return validate(args, jsondata, col_labels) |
| |
| for jobnum in range(0, len(jsondata['jobs'])): |
| bins = {} |
| run_total = {} |
| |
| for ddir in DDIR_LIST: |
| ddir_data = jsondata['jobs'][jobnum][ddir] |
| for lat in LAT_LIST: |
| ddir_lat = ddir + '_' + lat |
| if lat not in ddir_data or 'bins' not in ddir_data[lat]: |
| bins[ddir_lat] = [] |
| debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found') |
| continue |
| |
| debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing') |
| bins[ddir_lat] = [[int(key), value] for key, value in |
| six.iteritems(ddir_data[lat]['bins'])] |
| bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0]) |
| |
| run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))] |
| run_total[ddir_lat][0] = bins[ddir_lat][0][1] |
| for index in range(1, len(bins[ddir_lat])): |
| run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \ |
| bins[ddir_lat][index][1] |
| |
| csvfile = get_csvfile(args.dest, jobnum) |
| with open(csvfile, 'w') as output: |
| output.write(col_labels + "\n") |
| |
| # |
| # Have a counter for each ddir_lat pairing |
| # In each round, pick the shortest remaining duration |
| # and output a line with any values for that duration |
| # |
| indices = {x: 0 for x in ddir_lat_list} |
| while more_bins(indices, bins): |
| debug_print(args.debug, 'indices: ', indices) |
| min_lat = 17112760320 |
| for ddir_lat in ddir_lat_list: |
| if indices[ddir_lat] < len(bins[ddir_lat]): |
| min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat) |
| |
| output.write("{0}, ".format(min_lat)) |
| |
| for ddir_lat in ddir_lat_list: |
| if indices[ddir_lat] < len(bins[ddir_lat]) and \ |
| min_lat == bins[ddir_lat][indices[ddir_lat]][0]: |
| count = bins[ddir_lat][indices[ddir_lat]][1] |
| cumulative = run_total[ddir_lat][indices[ddir_lat]] |
| ptile = percentile(indices[ddir_lat], run_total[ddir_lat]) |
| output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile)) |
| indices[ddir_lat] += 1 |
| else: |
| output.write(", , , ") |
| output.write("\n") |
| |
| print("{0} generated".format(csvfile)) |
| |
| |
| if __name__ == '__main__': |
| main() |