| #!/usr/bin/python -tt |
| # |
| # rteval - script for evaluating platform suitability for RT Linux |
| # |
| # This program is used to determine the suitability of |
| # a system for use in a Real Time Linux environment. |
| # It starts up various system loads and measures event |
| # latency while the loads are running. A report is generated |
| # to show the latencies encountered during the run. |
| # |
| # Copyright 2009,2010,2011,2012 Clark Williams <williams@redhat.com> |
| # Copyright 2009,2010,2011,2012 David Sommerseth <davids@redhat.com> |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; if not, write to the Free Software |
| # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| # |
| # For the avoidance of doubt the "preferred form" of this code is one which |
| # is in an open unpatent encumbered format. Where cryptographic key signing |
| # forms part of the process of creating an executable the information |
| # including keys needed to generate an equivalently functional executable |
| # are deemed to be part of the source code. |
| # |
| |
| import sys |
| import os |
| import os.path |
| import time |
| import string |
| import threading |
| import subprocess |
| import socket |
| import optparse |
| import tempfile |
| import statvfs |
| import shutil |
| import signal |
| import rtevalclient |
| import ethtool |
| import xmlrpclib |
| import platform |
| import fnmatch |
| import glob |
| from datetime import datetime |
| from distutils import sysconfig |
| |
| # put local path at start of list to overide installed methods |
| sys.path.insert(0, "./rteval") |
| import util |
| import load |
| import cyclictest |
| import xmlout |
| import dmi |
| import rtevalConfig |
| import rtevalMailer |
| from cputopology import CPUtopology |
| |
| |
| pathSave={} |
| def getcmdpath(which): |
| """ |
| getcmdpath is a method which allows finding an executable in the PATH |
| directories to call it from full path |
| """ |
| if not pathSave.has_key(which): |
| for path in os.environ['PATH'].split(':'): |
| cmdfile = os.path.join(path, which) |
| if os.path.isfile(cmdfile) and os.access(cmdfile, os.X_OK): |
| pathSave[which] = cmdfile |
| break |
| if not pathSave[which]: |
| raise RuntimeError, "Command '%s' is unknown on this system" % which |
| return pathSave[which] |
| |
| |
| sigint_received = False |
| def sigint_handler(signum, frame): |
| global sigint_received |
| sigint_received = True |
| print "*** SIGINT received - stopping rteval run ***" |
| |
| def sigterm_handler(signum, frame): |
| raise RuntimeError, "SIGTERM received!" |
| |
| class RtEval(object): |
| def __init__(self, cmdargs): |
| self.version = "1.41" |
| self.load_modules = [] |
| self.workdir = os.getcwd() |
| self.reportdir = os.getcwd() |
| self.inifile = None |
| self.cmd_options = {} |
| self.start = datetime.now() |
| self.init = 'unknown' |
| |
| default_config = { |
| 'rteval': { |
| 'verbose' : False, |
| 'keepdata' : True, |
| 'debugging' : False, |
| 'duration' : '60', |
| 'sysreport' : False, |
| 'reportdir' : None, |
| 'reportfile' : None, |
| 'installdir' : '/usr/share/rteval', |
| 'srcdir' : '/usr/share/rteval/loadsource', |
| 'xmlrpc' : None, |
| 'xslt_report': '/usr/share/rteval/rteval_text.xsl', |
| 'report_interval': '600', |
| 'logging' : False, |
| }, |
| 'loads' : { |
| 'kcompile' : 'module', |
| 'hackbench' : 'module', |
| }, |
| 'kcompile' : { |
| 'source' : 'linux-2.6.39.tar.bz2', |
| 'jobspercore': '2', |
| }, |
| 'hackbench' : { |
| 'source' : 'hackbench.tar.bz2', |
| 'jobspercore': '5', |
| }, |
| 'cyclictest' : { |
| 'interval' : '100', |
| 'buckets' : '2000', |
| } |
| } |
| |
| # setup initial configuration |
| self.config = rtevalConfig.rtevalConfig(default_config, logfunc=self.info) |
| |
| # parse command line options |
| self.parse_options(cmdargs) |
| |
| # read in config file info |
| self.inifile = self.config.Load(self.cmd_options.inifile) |
| |
| # copy the command line options into the rteval config section |
| # (cmd line overrides config file values) |
| self.config.AppendConfig('rteval', self.cmd_options) |
| |
| if self.cmd_options.cyclictest_interval != None: |
| self.config.AppendConfig('cyclictest', { "interval":self.cmd_options.cyclictest_interval }) |
| |
| if self.cmd_options.cyclictest_distance != None: |
| self.config.AppendConfig('cyclictest', { "distance":self.cmd_options.cyclictest_distance }) |
| |
| if self.cmd_options.cyclictest_buckets != None: |
| self.config.AppendConfig('cyclictest', { "buckets":self.cmd_options.cyclictest_distance }) |
| |
| if self.cmd_options.cyclictest_priority != None: |
| self.config.AppendConfig('cyclictest', { "priority":self.cmd_options.cyclictest_priority }) |
| |
| if self.cmd_options.hackbench_jobspercore != None: |
| self.config.AppendConfig('hackbench', { "jobspercore":self.cmd_options.hackbench_jobspercore }) |
| |
| if self.cmd_options.kcompile_jobspercore != None: |
| self.config.AppendConfig('kcompile', { "jobspercore":self.cmd_options.kcompile_jobspercore }) |
| |
| self.debug("workdir: %s" % self.workdir) |
| |
| # prepare a mailer, if that's configured |
| if self.config.HasSection('smtp'): |
| self.mailer = rtevalMailer.rtevalMailer(self.config.GetSection('smtp')) |
| else: |
| self.mailer = None |
| |
| self.loads = [] |
| self.cputopology = None |
| self.numcores = None |
| self.memsize = None |
| self.current_clocksource = None |
| self.available_clocksource = None |
| self.services = None |
| self.kthreads = None |
| self.xml = None |
| self.baseos = "unknown" |
| self.annotate = self.cmd_options.annotate |
| |
| if not self.config.xslt_report.startswith(self.config.installdir): |
| self.config.xslt_report = os.path.join(self.config.installdir, "rteval_text.xsl") |
| |
| if not os.path.exists(self.config.xslt_report): |
| raise RuntimeError, "can't find XSL template (%s)!" % self.config.xslt_report |
| |
| # Add rteval directory into module search path |
| sys.path.insert(0, '%s/rteval' % sysconfig.get_python_lib()) |
| |
| # generate a set of "junk" characters to use for filtering later |
| self.junk = "" |
| for c in range(0, 0xff): |
| s = chr(c) |
| if s not in string.printable: |
| self.junk += s |
| self.transtable = string.maketrans("", "") |
| |
| # If --xmlrpc-submit is given, check that we can access the server |
| res = None |
| if self.config.xmlrpc: |
| self.debug("Checking if XML-RPC server '%s' is reachable" % self.config.xmlrpc) |
| attempt = 0 |
| warning_sent = False |
| ping_failed = False |
| while attempt < 6: |
| try: |
| client = rtevalclient.rtevalclient("http://%s/rteval/API1/" % self.config.xmlrpc) |
| res = client.Hello() |
| attempt = 10 |
| ping_failed = False |
| except xmlrpclib.ProtocolError: |
| # Server do not support Hello(), but is reachable |
| self.info("Got XML-RPC connection with %s but it did not support Hello()" |
| % self.config.xmlrpc) |
| res = None |
| except socket.error, err: |
| self.info("Could not establish XML-RPC contact with %s\n%s" |
| % (self.config.xmlrpc, str(err))) |
| |
| if (self.mailer is not None) and (not warning_sent): |
| self.mailer.SendMessage("[RTEVAL:WARNING] Failed to ping XML-RPC server", |
| "Server %s did not respond. Not giving up yet." |
| % self.config.xmlrpc) |
| warning_sent = True |
| |
| # Do attempts handling |
| attempt += 1 |
| if attempt > 5: |
| break # To avoid sleeping before we abort |
| |
| print "Failed pinging XML-RPC server. Doing another attempt(%i) " % attempt |
| time.sleep(attempt*15) # Incremental sleep - sleep attempts*15 seconds |
| ping_failed = True |
| |
| if ping_failed: |
| if not self.cmd_options.xmlrpc_noabort: |
| print "ERROR: Could not reach XML-RPC server '%s'. Aborting." % self.config.xmlrpc |
| sys.exit(2) |
| else: |
| print "WARNING: Could not ping the XML-RPC server. Will continue anyway." |
| |
| if res: |
| self.info("Verified XML-RPC connection with %s (XML-RPC API version: %i)" |
| % (res["server"], res["APIversion"])) |
| self.debug("Recieved greeting: %s" % res["greeting"]) |
| |
| |
| def get_cpu_topology(self): |
| ''' figure out how many processors we have available''' |
| |
| topology = CPUtopology() |
| topology.parse() |
| |
| self.numcores = topology.getCPUcores(True) |
| self.debug("counted %d cores (%d online) and %d sockets" % |
| (topology.getCPUcores(False), self.numcores, |
| topology.getCPUsockets())) |
| return topology.getXMLdata() |
| |
| def __get_services_sysvinit(self): |
| reject = ('functions', 'halt', 'killall', 'single', 'linuxconf', 'kudzu', |
| 'skeleton', 'README', '*.dpkg-dist', '*.dpkg-old', 'rc', 'rcS', |
| 'single', 'reboot', 'bootclean.sh') |
| for sdir in ('/etc/init.d', '/etc/rc.d/init.d'): |
| if os.path.isdir(sdir): |
| servicesdir = sdir |
| break |
| if not servicesdir: |
| raise RuntimeError, "No services dir (init.d) found on your system" |
| self.debug("Services located in %s, going through each service file to check status" % servicesdir) |
| ret_services = {} |
| for service in glob.glob(os.path.join(servicesdir, '*')): |
| servicename = os.path.basename(service) |
| if not [1 for p in reject if fnmatch.fnmatch(servicename, p)] and os.access(service, os.X_OK): |
| cmd = '%s -qs "\(^\|\W\)status)" %s' % (getcmdpath('grep'), service) |
| c = subprocess.Popen(cmd, shell=True) |
| c.wait() |
| if c.returncode == 0: |
| cmd = ['env', '-i', 'LANG="%s"' % os.environ['LANG'], 'PATH="%s"' % os.environ['PATH'], 'TERM="%s"' % os.environ['TERM'], service, 'status'] |
| c = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| c.wait() |
| if c.returncode == 0 and (c.stdout.read() or c.stderr.read()): |
| ret_services[servicename] = 'running' |
| else: |
| ret_services[servicename] = 'not running' |
| else: |
| ret_services[servicename] = 'unknown' |
| return ret_services |
| |
| def __get_services_systemd(self): |
| ret_services = {} |
| cmd = '%s list-unit-files -t service --no-legend' % getcmdpath('systemctl') |
| self.debug("cmd: %s" % cmd) |
| c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| for p in c.stdout: |
| # p are lines like "servicename.service status" |
| v = p.strip().split() |
| ret_services[v[0].split('.')[0]] = v[1] |
| return ret_services |
| |
| def get_services(self): |
| cmd = [getcmdpath('ps'), '-ocomm=', '1'] |
| c = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| self.init = c.stdout.read().strip() |
| if self.init == 'systemd': |
| self.debug("Using systemd to get services status") |
| return self.__get_services_systemd() |
| elif self.init == 'init': |
| self.init = 'sysvinit' |
| self.debug("Using sysvinit to get services status") |
| return self.__get_services_sysvinit() |
| else: |
| raise RuntimeError, "Unknown init system (%s)" % self.init |
| return {} |
| |
| def get_kthreads(self): |
| policies = {'FF':'fifo', 'RR':'rrobin', 'TS':'other', '?':'unknown' } |
| ret_kthreads = {} |
| self.debug("getting kthread status") |
| cmd = '%s -eocommand,pid,policy,rtprio,comm' % getcmdpath('ps') |
| self.debug("cmd: %s" % cmd) |
| c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
| for p in c.stdout: |
| v = p.strip().split() |
| kcmd = v.pop(0) |
| try: |
| if int(v[0]) > 0 and kcmd.startswith('[') and kcmd.endswith(']'): |
| ret_kthreads[v[0]] = {'policy' : policies[v[1]], |
| 'priority' : v[2], 'name' : v[3] } |
| except ValueError: |
| pass # Ignore lines which don't have a number in the first row |
| return ret_kthreads |
| |
| def get_modules(self): |
| modlist = [] |
| try: |
| fp = open('/proc/modules', 'r') |
| line = fp.readline() |
| while line: |
| mod = line.split() |
| modlist.append({"modname": mod[0], |
| "modsize": mod[1], |
| "numusers": mod[2], |
| "usedby": mod[3], |
| "modstate": mod[4]}) |
| line = fp.readline() |
| fp.close() |
| except Exception, err: |
| raise err |
| return modlist |
| |
| def parse_options(self, cmdargs): |
| '''parse the command line arguments''' |
| parser = optparse.OptionParser() |
| parser.add_option("-d", "--duration", dest="duration", |
| type="string", default=self.config.duration, |
| help="specify length of test run (default: %default)") |
| parser.add_option("-v", "--verbose", dest="verbose", |
| action="store_true", default=self.config.verbose, |
| help="turn on verbose prints (default: %default)") |
| parser.add_option("-w", "--workdir", dest="workdir", |
| type="string", default=self.workdir, |
| help="top directory for rteval data (default: %default)") |
| parser.add_option("-l", "--loaddir", dest="srcdir", |
| type="string", default=self.config.srcdir, |
| help="directory for load source tarballs (default: %default)") |
| parser.add_option("-i", "--installdir", dest="installdir", |
| type="string", default=self.config.installdir, |
| help="place to locate installed templates (default: %default)") |
| parser.add_option("-s", "--sysreport", dest="sysreport", |
| action="store_true", default=self.config.sysreport, |
| help='run sysreport to collect system data (default: %default)') |
| parser.add_option("-D", '--debug', dest='debugging', |
| action='store_true', default=self.config.debugging, |
| help='turn on debug prints (default: %default)') |
| parser.add_option("-X", '--xmlrpc-submit', dest='xmlrpc', |
| action='store', default=self.config.xmlrpc, metavar='HOST', |
| help='Hostname to XML-RPC server to submit reports') |
| parser.add_option("-P", "--xmlrpc-no-abort", dest="xmlrpc_noabort", |
| action='store_true', default=False, |
| help="Do not abort if XML-RPC server do not respond to ping request"); |
| parser.add_option("-Z", '--summarize', dest='summarize', |
| action='store_true', default=False, |
| help='summarize an already existing XML report') |
| parser.add_option("-H", '--raw-histogram', dest='rawhistogram', |
| action='store_true', default=False, |
| help='Generate raw histogram data for an already existing XML report') |
| parser.add_option("-f", "--inifile", dest="inifile", |
| type='string', default=None, |
| help="initialization file for configuring loads and behavior") |
| parser.add_option("-a", "--annotate", dest="annotate", |
| type="string", default=None, |
| help="Add a little annotation which is stored in the report") |
| parser.add_option("-L", "--logging", dest="logging", |
| action='store_true', default=False, |
| help='log the output of the loads in the report directory') |
| parser.add_option("-O", "--onlyload", dest="onlyload", |
| action='store_true', default=False, |
| help="only run the loads (don't run measurement threads)") |
| |
| # module options |
| parser.add_option("", "--cyclictest-interval", dest="cyclictest_interval", |
| action="store", type="int", |
| help="cyclictest measurement interval in microseconds") |
| parser.add_option("", "--cyclictest-distance", dest="cyclictest_distance", |
| action="store", type="int", |
| help="cyclictest measurement interval increment in microseconds") |
| parser.add_option("", "--cyclictest-buckets", dest="cyclictest_buckets", |
| action="store", type="int", |
| help="number of cyclictest 1 microsecond histogram buckets") |
| parser.add_option("", "--cyclictest-priority", dest="cyclictest_priority", |
| action="store", type="int", |
| help="SCHED_FIFO priority of measurement threads") |
| |
| parser.add_option("", "--hackbench-jobspercore", dest="hackbench_jobspercore", |
| action="store", type="int", |
| help="number of hackbench jobs per-core") |
| parser.add_option("", "--kcompile-jobspercore", dest="kcompile_jobspercore", |
| action="store", type="int", |
| help="number of kernel compile jobs per-core") |
| |
| |
| (self.cmd_options, self.cmd_arguments) = parser.parse_args(args = cmdargs) |
| if self.cmd_options.duration: |
| mult = 1.0 |
| v = self.cmd_options.duration.lower() |
| if v.endswith('s'): |
| v = v[:-1] |
| elif v.endswith('m'): |
| v = v[:-1] |
| mult = 60.0 |
| elif v.endswith('h'): |
| v = v[:-1] |
| mult = 3600.0 |
| elif v.endswith('d'): |
| v = v[:-1] |
| mult = 3600.0 * 24.0 |
| self.cmd_options.duration = float(v) * mult |
| self.workdir = os.path.abspath(self.cmd_options.workdir) |
| |
| |
| def debug(self, str): |
| if self.config.debugging is True: |
| print "rteval: %s" % str |
| |
| def info(self, str): |
| if self.config.verbose is True: |
| print str |
| |
| def run_sysreport(self): |
| import glob |
| if os.path.exists('/usr/sbin/sosreport'): |
| exe = '/usr/sbin/sosreport' |
| elif os.path.exists('/usr/sbin/sysreport'): |
| exe = '/usr/sbin/sysreport' |
| else: |
| raise RuntimeError, "Can't find sosreport/sysreport" |
| |
| self.debug("report tool: %s" % exe) |
| options = ['-k', 'rpm.rpmva=off', |
| '--name=rteval', |
| '--batch', |
| '--no-progressbar'] |
| |
| self.info("Generating SOS report") |
| self.info("using command %s" % " ".join([exe]+options)) |
| subprocess.call([exe] + options) |
| for s in glob.glob('/tmp/s?sreport-rteval-*'): |
| self.debug("moving %s to %s" % (s, self.reportdir)) |
| shutil.move(s, self.reportdir) |
| |
| |
| def genxml(self, duration, accum, samples, xslt = None): |
| seconds = duration.seconds |
| hours = seconds / 3600 |
| if hours: seconds -= (hours * 3600) |
| minutes = seconds / 60 |
| if minutes: seconds -= (minutes * 60) |
| (sys, node, release, ver, machine) = os.uname() |
| |
| # Start new XML report |
| self.xmlreport = xmlout.XMLOut('rteval', self.version) |
| self.xmlreport.NewReport() |
| |
| self.xmlreport.openblock('run_info', {'days': duration.days, |
| 'hours': hours, |
| 'minutes': minutes, |
| 'seconds': seconds}) |
| self.xmlreport.taggedvalue('date', self.start.strftime('%Y-%m-%d')) |
| self.xmlreport.taggedvalue('time', self.start.strftime('%H:%M:%S')) |
| if self.annotate: |
| self.xmlreport.taggedvalue('annotate', self.annotate) |
| self.xmlreport.closeblock() |
| self.xmlreport.openblock('uname') |
| self.xmlreport.taggedvalue('node', node) |
| isrt = 1 |
| if ver.find(' RT ') == -1: |
| isrt = 0 |
| self.xmlreport.taggedvalue('kernel', release, {'is_RT':isrt}) |
| self.xmlreport.taggedvalue('arch', machine) |
| self.xmlreport.taggedvalue('baseos', self.baseos) |
| self.xmlreport.closeblock() |
| |
| self.xmlreport.openblock("clocksource") |
| self.xmlreport.taggedvalue('current', self.current_clocksource) |
| self.xmlreport.taggedvalue('available', self.available_clocksource) |
| self.xmlreport.closeblock() |
| |
| self.xmlreport.openblock('hardware') |
| self.xmlreport.AppendXMLnodes(self.cputopology) |
| self.xmlreport.taggedvalue('numa_nodes', self.numanodes) |
| self.xmlreport.taggedvalue('memory_size', "%.3f" % self.memsize[0], {"unit": self.memsize[1]}) |
| self.xmlreport.closeblock() |
| |
| self.xmlreport.openblock('services', {'init': self.init}) |
| for s in self.services: |
| self.xmlreport.taggedvalue("service", self.services[s], {"name": s}) |
| self.xmlreport.closeblock() |
| |
| keys = self.kthreads.keys() |
| if len(keys): |
| keys.sort() |
| self.xmlreport.openblock('kthreads') |
| for pid in keys: |
| self.xmlreport.taggedvalue('thread', self.kthreads[pid]['name'], |
| { 'policy' : self.kthreads[pid]['policy'], |
| 'priority' : self.kthreads[pid]['priority'], |
| }) |
| self.xmlreport.closeblock() |
| |
| modlist = util.get_modules() |
| if len(modlist): |
| self.xmlreport.openblock('kernelmodules') |
| for mod in modlist: |
| self.xmlreport.openblock('module') |
| self.xmlreport.taggedvalue('info', mod['modname'], |
| {'size': mod['modsize'], |
| 'state': mod['modstate'], |
| 'numusers': mod['numusers']}) |
| if mod['usedby'] != '-': |
| self.xmlreport.openblock('usedby') |
| for ub in mod['usedby'].split(','): |
| if len(ub): |
| self.xmlreport.taggedvalue('module', ub, None) |
| self.xmlreport.closeblock() |
| self.xmlreport.closeblock() |
| self.xmlreport.closeblock() |
| |
| # |
| # Retrieve configured IP addresses |
| # |
| self.xmlreport.openblock('network_config') |
| |
| # Get the interface name for the IPv4 default gw |
| route = open('/proc/net/route') |
| defgw4 = None |
| if route: |
| rl = route.readline() |
| while rl != '' : |
| rl = route.readline() |
| splt = rl.split("\t") |
| # Only catch default route |
| if len(splt) > 2 and splt[2] != '00000000' and splt[1] == '00000000': |
| defgw4 = splt[0] |
| break |
| route.close() |
| |
| # Make an interface tag for each device found |
| if hasattr(ethtool, 'get_interfaces_info'): |
| # Using the newer python-ethtool API (version >= 0.4) |
| for dev in ethtool.get_interfaces_info(ethtool.get_devices()): |
| if cmp(dev.device,'lo') == 0: |
| continue |
| |
| self.xmlreport.openblock('interface', |
| {'device': dev.device, |
| 'hwaddr': dev.mac_address} |
| ) |
| |
| # Protcol configurations |
| if dev.ipv4_address: |
| self.xmlreport.openblock('IPv4', |
| {'ipaddr': dev.ipv4_address, |
| 'netmask': dev.ipv4_netmask, |
| 'broadcast': dev.ipv4_broadcast, |
| 'defaultgw': (defgw4 == dev.device) and '1' or '0'} |
| ) |
| self.xmlreport.closeblock() |
| |
| for ip6 in dev.get_ipv6_addresses(): |
| self.xmlreport.openblock('IPv6', |
| {'ipaddr': ip6.address, |
| 'netmask': ip6.netmask, |
| 'scope': ip6.scope} |
| ) |
| self.xmlreport.closeblock() |
| self.xmlreport.closeblock() |
| else: # Fall back to older python-ethtool API (version < 0.4) |
| ifdevs = ethtool.get_active_devices() |
| ifdevs.remove('lo') |
| ifdevs.sort() |
| |
| for dev in ifdevs: |
| self.xmlreport.openblock('interface', |
| {'device': dev, |
| 'hwaddr': ethtool.get_hwaddr(dev)} |
| ) |
| self.xmlreport.openblock('IPv4', |
| {'ipaddr': ethtool.get_ipaddr(dev), |
| 'netmask': ethtool.get_netmask(dev), |
| 'defaultgw': (defgw4 == dev) and '1' or '0'} |
| ) |
| self.xmlreport.closeblock() |
| self.xmlreport.closeblock() |
| self.xmlreport.closeblock() |
| |
| self.xmlreport.openblock('loads', {'load_average':str(accum / samples)}) |
| for load in self.loads: |
| load.genxml(self.xmlreport) |
| self.xmlreport.closeblock() |
| self.cyclictest.genxml(self.xmlreport) |
| |
| # now generate the dmidecode data for this host |
| d = dmi.DMIinfo(self.config.GetSection('rteval')) |
| d.genxml(self.xmlreport) |
| |
| # Close the report - prepare for return the result |
| self.xmlreport.close() |
| |
| # Write XML (or write XSLT parsed XML if xslt != None) |
| if self.xml != None: |
| self.xmlreport.Write(self.xml, xslt) |
| else: |
| # If no file is set, use stdout |
| self.xmlreport.Write("-", xslt) # libxml2 defines a filename as "-" to be stdout |
| |
| |
| def report(self): |
| "Create a screen report, based on a predefined XSLT template" |
| self.xmlreport.Write("-", self.config.xslt_report) |
| |
| def XMLreport(self): |
| "Retrieves the complete rteval XML report as a libxml2.xmlDoc object" |
| return self.xmlreport.GetXMLdocument() |
| |
| def show_report(self, xmlfile, xsltfile): |
| '''summarize a previously generated xml file''' |
| print "Loading %s for summarizing" % xmlfile |
| |
| xsltfullpath = os.path.join(self.config.installdir, xsltfile) |
| if not os.path.exists(xsltfullpath): |
| raise RuntimeError, "can't find XSL template (%s)!" % xsltfullpath |
| |
| xmlreport = xmlout.XMLOut('rteval', self.version) |
| xmlreport.LoadReport(xmlfile) |
| xmlreport.Write('-', xsltfullpath) |
| del xmlreport |
| |
| def start_loads(self): |
| if len(self.loads) == 0: |
| raise RuntimeError, "start_loads: No loads defined!" |
| self.info ("starting loads:") |
| for l in self.loads: |
| l.start() |
| # now wait until they're all ready |
| self.info("waiting for ready from all loads") |
| ready=False |
| while not ready: |
| busy = 0 |
| for l in self.loads: |
| if not l.isAlive(): |
| raise RuntimeError, "%s died" % l.name |
| if not l.isReady(): |
| busy += 1 |
| self.debug("waiting for %s" % l.name) |
| if busy: |
| time.sleep(1.0) |
| else: |
| ready = True |
| |
| def stop_loads(self): |
| if len(self.loads) == 0: |
| raise RuntimeError, "stop_loads: No loads defined!" |
| self.info("stopping loads: ") |
| for l in self.loads: |
| self.info("\t%s" % l.name) |
| l.stopevent.set() |
| l.join(2.0) |
| |
| def make_report_dir(self): |
| t = self.start |
| i = 1 |
| self.reportdir = os.path.join(self.workdir, |
| t.strftime("rteval-%Y%m%d-"+str(i))) |
| while os.path.exists(self.reportdir): |
| i += 1 |
| self.reportdir = os.path.join(self.workdir, |
| t.strftime('rteval-%Y%m%d-'+str(i))) |
| if not os.path.isdir(self.reportdir): |
| os.mkdir(self.reportdir) |
| os.mkdir(os.path.join(self.reportdir, "logs")) |
| return self.reportdir |
| |
| def get_dmesg(self): |
| dpath = "/var/log/dmesg" |
| if not os.path.exists(dpath): |
| print "dmesg file not found at %s" % dpath |
| return |
| shutil.copyfile(dpath, os.path.join(self.reportdir, "dmesg")) |
| |
| |
| def show_remaining_time(self, remaining): |
| r = int(remaining) |
| days = r / 86400 |
| if days: r = r - (days * 86400) |
| hours = r / 3600 |
| if hours: r = r - (hours * 3600) |
| minutes = r / 60 |
| if minutes: r = r - (minutes * 60) |
| print "rteval time remaining: %d days, %d hours, %d minutes, %d seconds" % (days, hours, minutes, r) |
| |
| |
| def measure(self): |
| # Collect misc system info |
| self.baseos = util.get_base_os() |
| self.cputopology = self.get_cpu_topology() |
| self.numanodes = util.get_num_nodes() |
| self.memsize = util.get_memory_size() |
| (self.current_clocksource, self.available_clocksource) = util.get_clocksources() |
| self.services = self.get_services() |
| self.kthreads = self.get_kthreads() |
| |
| onlyload = self.cmd_options.onlyload |
| |
| builddir = os.path.join(self.workdir, 'rteval-build') |
| if not os.path.isdir(builddir): os.mkdir(builddir) |
| self.reportfile = os.path.join(self.reportdir, "summary.rpt") |
| self.xml = os.path.join(self.reportdir, "summary.xml") |
| |
| # read in loads from the ini file |
| self.load_modules = [] |
| loads = self.config.GetSection("loads") |
| for l in loads: |
| # hope to eventually have different kinds but module is only on |
| # for now (jcw) |
| if l[1].lower() == 'module': |
| self.info("importing load module %s" % l[0]) |
| self.load_modules.append(__import__(l[0])) |
| |
| self.info("setting up loads") |
| self.loads = [] |
| params = {'workdir':self.workdir, |
| 'reportdir':self.reportdir, |
| 'builddir':builddir, |
| 'srcdir':self.config.srcdir, |
| 'verbose': self.config.verbose, |
| 'debugging': self.config.debugging, |
| 'numcores':self.numcores, |
| 'logging':self.config.logging, |
| 'memsize':self.memsize, |
| 'numanodes':self.numanodes, |
| 'duration':self.config.duration, |
| } |
| |
| for m in self.load_modules: |
| self.config.AppendConfig(m.__name__, params) |
| self.info("creating load instance for %s" % m.__name__) |
| self.loads.append(m.create(self.config.GetSection(m.__name__))) |
| |
| if not onlyload: |
| self.config.AppendConfig('cyclictest', params) |
| self.info("setting up cyclictest") |
| self.cyclictest = cyclictest.Cyclictest(params=self.config.GetSection('cyclictest')) |
| |
| nthreads = 0 |
| try: |
| # start the loads |
| self.start_loads() |
| |
| print "rteval run on %s started at %s" % (os.uname()[2], time.asctime()) |
| print "started %d loads on %d cores" % (len(self.loads), self.numcores), |
| if self.numanodes > 1: |
| print " with %d numa nodes" % self.numanodes |
| else: |
| print "" |
| print "Run duration: %d seconds" % self.config.duration |
| |
| start = datetime.now() |
| |
| if not onlyload: |
| # start the cyclictest thread |
| self.info("starting cyclictest") |
| self.cyclictest.start() |
| |
| # turn loose the loads |
| self.info("sending start event to all loads") |
| for l in self.loads: |
| l.startevent.set() |
| nthreads += 1 |
| |
| accum = 0.0 |
| samples = 0 |
| |
| report_interval = int(self.config.GetSection('rteval').report_interval) |
| |
| # wait for time to expire or thread to die |
| signal.signal(signal.SIGINT, sigint_handler) |
| signal.signal(signal.SIGTERM, sigterm_handler) |
| self.info("waiting for duration (%f)" % self.config.duration) |
| stoptime = (time.time() + self.config.duration) |
| currtime = time.time() |
| rpttime = currtime + report_interval |
| loadcount = 5 |
| while (currtime <= stoptime) and not sigint_received: |
| time.sleep(1.0) |
| if not onlyload and not self.cyclictest.isAlive(): |
| raise RuntimeError, "cyclictest thread died!" |
| if len(threading.enumerate()) < nthreads: |
| raise RuntimeError, "load thread died!" |
| if not loadcount: |
| # open the loadavg /proc entry |
| p = open("/proc/loadavg") |
| load = float(p.readline().split()[0]) |
| p.close() |
| accum += load |
| samples += 1 |
| loadcount = 5 |
| #self.debug("current loadavg: %f, running avg: %f (load: %f, samples: %d)" % \ |
| # (load, accum/samples, load, samples)) |
| else: |
| loadcount -= 1 |
| if currtime >= rpttime: |
| left_to_run = stoptime - currtime |
| self.show_remaining_time(left_to_run) |
| rpttime = currtime + report_interval |
| print "load average: %.2f" % (accum / samples) |
| currtime = time.time() |
| self.debug("out of measurement loop") |
| signal.signal(signal.SIGINT, signal.SIG_DFL) |
| signal.signal(signal.SIGTERM, signal.SIG_DFL) |
| |
| except RuntimeError, e: |
| print "Runtime error during measurement: %s", e |
| raise |
| |
| finally: |
| if not onlyload: |
| # stop cyclictest |
| self.cyclictest.stopevent.set() |
| |
| # stop the loads |
| self.stop_loads() |
| |
| print "stopping run at %s" % time.asctime() |
| if not onlyload: |
| # wait for cyclictest to finish calculating stats |
| self.cyclictest.finished.wait() |
| self.genxml(datetime.now() - start, accum, samples) |
| self.report() |
| if self.config.sysreport: |
| self.run_sysreport() |
| |
| |
| def XMLRPC_Send(self): |
| "Sends the report to a given XML-RPC host. Returns 0 on success or 2 on submission failure." |
| |
| if not self.config.xmlrpc: |
| return 2 |
| |
| url = "http://%s/rteval/API1/" % self.config.xmlrpc |
| attempt = 0 |
| exitcode = 2 # Presume failure |
| warning_sent = False |
| while attempt < 6: |
| try: |
| client = rtevalclient.rtevalclient(url) |
| print "Submitting report to %s" % url |
| rterid = client.SendReport(self.xmlreport.GetXMLdocument()) |
| print "Report registered with submission id %i" % rterid |
| attempt = 10 |
| exitcode = 0 # Success |
| except socket.error: |
| if (self.mailer is not None) and (not warning_sent): |
| self.mailer.SendMessage("[RTEVAL:WARNING] Failed to submit report to XML-RPC server", |
| "Server %s did not respond. Not giving up yet." |
| % self.config.xmlrpc) |
| warning_sent = True |
| |
| attempt += 1 |
| if attempt > 5: |
| break # To avoid sleeping before we abort |
| |
| print "Failed sending report. Doing another attempt(%i) " % attempt |
| time.sleep(attempt*5*60) # Incremental sleep - sleep attempts*5 minutes |
| |
| except Exception, err: |
| raise err |
| |
| if (self.mailer is not None): |
| # Send final result messages |
| if exitcode == 2: |
| self.mailer.SendMessage("[RTEVAL:FAILURE] Failed to submit report to XML-RPC server", |
| "Server %s did not respond at all after %i attempts." |
| % (self.config.xmlrpc, attempt - 1)) |
| elif (exitcode == 0) and warning_sent: |
| self.mailer.SendMessage("[RTEVAL:SUCCESS] XML-RPC server available again", |
| "Succeeded to submit the report to %s in the end." |
| % (self.config.xmlrpc)) |
| return exitcode |
| |
| |
| def tar_results(self): |
| if not os.path.isdir(self.reportdir): |
| raise RuntimeError, "no such directory: %s" % self.reportdir |
| import tarfile |
| dirname = os.path.dirname(self.reportdir) |
| rptdir = os.path.basename(self.reportdir) |
| cwd = os.getcwd() |
| os.chdir(dirname) |
| try: |
| t = tarfile.open(rptdir + ".tar.bz2", "w:bz2") |
| t.add(rptdir) |
| t.close() |
| except: |
| os.chdir(cwd) |
| |
| def summarize(self, file): |
| isarchive = False |
| summary = file |
| if file.endswith(".tar.bz2"): |
| import tarfile |
| try: |
| t = tarfile.open(file) |
| except: |
| print "Don't know how to summarize %s (tarfile open failed)" % file |
| return |
| element = None |
| for f in t.getnames(): |
| if f.find('summary.xml') != -1: |
| element = f |
| break |
| if element == None: |
| print "No summary.xml found in tar archive %s" % file |
| return |
| tmp = tempfile.gettempdir() |
| self.debug("extracting %s from %s for summarizing" % (element, file)) |
| t.extract(element, path=tmp) |
| summary = os.path.join(tmp, element) |
| isarchive = True |
| self.show_report(summary, 'rteval_text.xsl') |
| if isarchive: |
| os.unlink(summary) |
| |
| def rteval(self): |
| ''' main function for rteval''' |
| retval = 0; |
| |
| # Parse initial DMI decoding errors |
| dmi.ProcessWarnings() |
| |
| # if --summarize was specified then just parse the XML, print it and exit |
| if self.cmd_options.summarize or self.cmd_options.rawhistogram: |
| if len(self.cmd_arguments) < 1: |
| raise RuntimeError, "Must specify at least one XML file with --summarize!" |
| |
| for x in self.cmd_arguments: |
| if self.cmd_options.summarize: |
| self.summarize(x) |
| elif self.cmd_options.rawhistogram: |
| self.show_report(x, 'rteval_histogram_raw.xsl') |
| |
| sys.exit(0) |
| |
| if os.getuid() != 0: |
| print "Must be root to run rteval!" |
| sys.exit(-1) |
| |
| self.debug('''rteval options: |
| workdir: %s |
| loaddir: %s |
| reportdir: %s |
| verbose: %s |
| debugging: %s |
| logging: %s |
| duration: %f |
| sysreport: %s |
| inifile: %s''' % (self.workdir, self.config.srcdir, self.reportdir, self.config.verbose, |
| self.config.debugging, self.config.logging, self.config.duration, |
| self.config.sysreport, self.inifile)) |
| |
| if not os.path.isdir(self.workdir): |
| raise RuntimeError, "work directory %d does not exist" % self.workdir |
| |
| # create our report directory |
| try: |
| self.make_report_dir() |
| except: |
| print "Cannot create the report dir!" |
| print "(is this an NFS filesystem with rootsquash turned on?)" |
| sys.exit(-1) |
| |
| self.measure() |
| |
| # if --xmlrpc-submit | -X was given, send our report to this host |
| if self.config.xmlrpc: |
| retval = self.XMLRPC_Send() |
| |
| self.get_dmesg() |
| self.tar_results() |
| |
| return retval |
| |
| if __name__ == '__main__': |
| import pwd, grp |
| |
| try: |
| # Parse initial DMI decoding errors |
| dmi.ProcessWarnings() |
| |
| rteval = RtEval(sys.argv[1:]) |
| ec = rteval.rteval() |
| rteval.debug("exiting with exit code: %d" % ec) |
| sys.exit(ec) |
| except KeyboardInterrupt: |
| sys.exit(0) |