rteval/rteval.py - pub/scm/linux/kernel/git/clrkwllms/rteval - Git at Google

 #!/usr/bin/python -tt
 #
 #   rteval - script for evaluating platform suitability for RT Linux
 #
 #           This program is used to determine the suitability of
 #           a system for use in a Real Time Linux environment.
 #           It starts up various system loads and measures event
 #           latency while the loads are running. A report is generated
 #           to show the latencies encountered during the run.
 #
 #   Copyright 2009,2010,2011,2012   Clark Williams <williams@redhat.com>
 #   Copyright 2009,2010,2011,2012   David Sommerseth <davids@redhat.com>
 #
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
 #   the Free Software Foundation; either version 2 of the License, or
 #   (at your option) any later version.
 #
 #   This program is distributed in the hope that it will be useful,
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #   GNU General Public License for more details.
 #
 #   You should have received a copy of the GNU General Public License
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 #
 #   For the avoidance of doubt the "preferred form" of this code is one which
 #   is in an open unpatent encumbered format. Where cryptographic key signing
 #   forms part of the process of creating an executable the information
 #   including keys needed to generate an equivalently functional executable
 #   are deemed to be part of the source code.
 #

 import sys
 import os
 import os.path
 import time
 import string
 import threading
 import subprocess
 import socket
 import optparse
 import tempfile
 import statvfs
 import shutil
 import signal
 import rtevalclient
 import ethtool
 import xmlrpclib
 import platform
 import fnmatch
 import glob
 from datetime import datetime
 from distutils import sysconfig

 # put local path at start of list to overide installed methods
 sys.path.insert(0, "./rteval")
 import util
 import load
 import cyclictest
 import xmlout
 import dmi
 import rtevalConfig
 import rtevalMailer
 from cputopology import CPUtopology


 pathSave={}
 def getcmdpath(which):
     """
     getcmdpath is a method which allows finding an executable in the PATH
     directories to call it from full path
     """
     if not pathSave.has_key(which):
         for path in os.environ['PATH'].split(':'):
             cmdfile = os.path.join(path, which)
             if os.path.isfile(cmdfile) and os.access(cmdfile, os.X_OK):
                 pathSave[which] = cmdfile
                 break
         if not pathSave[which]:
             raise RuntimeError, "Command '%s' is unknown on this system" % which
     return pathSave[which]


 sigint_received = False
 def sigint_handler(signum, frame):
     global sigint_received
     sigint_received = True
     print "*** SIGINT received - stopping rteval run ***"

 def sigterm_handler(signum, frame):
     raise RuntimeError,  "SIGTERM received!"

 class RtEval(object):
     def __init__(self, cmdargs):
         self.version = "1.41"
         self.load_modules = []
         self.workdir = os.getcwd()
         self.reportdir = os.getcwd()
         self.inifile = None
         self.cmd_options = {}
         self.start = datetime.now()
         self.init = 'unknown'

         default_config = {
             'rteval': {
                 'verbose'    : False,
                 'keepdata'   : True,
                 'debugging'  : False,
                 'duration'   : '60',
                 'sysreport'  : False,
                 'reportdir'  : None,
                 'reportfile' : None,
                 'installdir' : '/usr/share/rteval',
                 'srcdir'     : '/usr/share/rteval/loadsource',
                 'xmlrpc'     : None,
                 'xslt_report': '/usr/share/rteval/rteval_text.xsl',
                 'report_interval': '600',
                 'logging'    : False,
                 },
            'loads' : {
                 'kcompile'   : 'module',
                 'hackbench'  : 'module',
                 },
             'kcompile' : {
                 'source'     : 'linux-2.6.39.tar.bz2',
                 'jobspercore': '2',
                 },
             'hackbench' : {
                 'source'     : 'hackbench.tar.bz2',
                 'jobspercore': '5',
                 },
             'cyclictest' : {
                 'interval' : '100',
                 'buckets'  : '2000',
                 }
             }

         # setup initial configuration
         self.config = rtevalConfig.rtevalConfig(default_config, logfunc=self.info)

         # parse command line options
         self.parse_options(cmdargs)

         # read in config file info
         self.inifile = self.config.Load(self.cmd_options.inifile)

         # copy the command line options into the rteval config section
         # (cmd line overrides config file values)
         self.config.AppendConfig('rteval', self.cmd_options)

         if self.cmd_options.cyclictest_interval != None:
             self.config.AppendConfig('cyclictest', { "interval":self.cmd_options.cyclictest_interval })

         if self.cmd_options.cyclictest_distance != None:
             self.config.AppendConfig('cyclictest', { "distance":self.cmd_options.cyclictest_distance })

         if self.cmd_options.cyclictest_buckets != None:
             self.config.AppendConfig('cyclictest', { "buckets":self.cmd_options.cyclictest_distance })

         if self.cmd_options.cyclictest_priority != None:
             self.config.AppendConfig('cyclictest', { "priority":self.cmd_options.cyclictest_priority })

         if self.cmd_options.hackbench_jobspercore != None:
             self.config.AppendConfig('hackbench', { "jobspercore":self.cmd_options.hackbench_jobspercore })

         if self.cmd_options.kcompile_jobspercore != None:
             self.config.AppendConfig('kcompile', { "jobspercore":self.cmd_options.kcompile_jobspercore })

         self.debug("workdir: %s" % self.workdir)

         # prepare a mailer, if that's configured
         if self.config.HasSection('smtp'):
             self.mailer = rtevalMailer.rtevalMailer(self.config.GetSection('smtp'))
         else:
             self.mailer = None

         self.loads = []
         self.cputopology = None
         self.numcores = None
         self.memsize = None
         self.current_clocksource = None
         self.available_clocksource = None
         self.services = None
         self.kthreads = None
         self.xml = None
         self.baseos = "unknown"
         self.annotate = self.cmd_options.annotate

         if not self.config.xslt_report.startswith(self.config.installdir):
             self.config.xslt_report = os.path.join(self.config.installdir, "rteval_text.xsl")

         if not os.path.exists(self.config.xslt_report):
             raise RuntimeError, "can't find XSL template (%s)!" % self.config.xslt_report

         # Add rteval directory into module search path
         sys.path.insert(0, '%s/rteval' % sysconfig.get_python_lib())

         # generate a set of "junk" characters to use for filtering later
         self.junk = ""
         for c in range(0, 0xff):
             s = chr(c)
             if s not in string.printable:
                 self.junk += s
         self.transtable = string.maketrans("", "")

         # If --xmlrpc-submit is given, check that we can access the server
         res = None
         if self.config.xmlrpc:
             self.debug("Checking if XML-RPC server '%s' is reachable" % self.config.xmlrpc)
             attempt = 0
             warning_sent = False
             ping_failed = False
             while attempt < 6:
                 try:
                     client = rtevalclient.rtevalclient("http://%s/rteval/API1/" % self.config.xmlrpc)
                     res = client.Hello()
                     attempt = 10
                     ping_failed = False
                 except xmlrpclib.ProtocolError:
                     # Server do not support Hello(), but is reachable
                     self.info("Got XML-RPC connection with %s but it did not support Hello()"
                               % self.config.xmlrpc)
                     res = None
                 except socket.error, err:
                     self.info("Could not establish XML-RPC contact with %s\n%s"
                               % (self.config.xmlrpc, str(err)))

                     if (self.mailer is not None) and (not warning_sent):
                         self.mailer.SendMessage("[RTEVAL:WARNING] Failed to ping XML-RPC server",
                                                 "Server %s did not respond.  Not giving up yet."
                                                 % self.config.xmlrpc)
                         warning_sent = True

                     # Do attempts handling
                     attempt += 1
                     if attempt > 5:
                         break # To avoid sleeping before we abort

                     print "Failed pinging XML-RPC server.  Doing another attempt(%i) " % attempt
                     time.sleep(attempt*15) # Incremental sleep - sleep attempts*15 seconds
                     ping_failed = True

             if ping_failed:
                 if not self.cmd_options.xmlrpc_noabort:
                     print "ERROR: Could not reach XML-RPC server '%s'.  Aborting." % self.config.xmlrpc
                     sys.exit(2)
                 else:
                     print "WARNING: Could not ping the XML-RPC server.  Will continue anyway."

             if res:
                 self.info("Verified XML-RPC connection with %s (XML-RPC API version: %i)"
                           % (res["server"], res["APIversion"]))
                 self.debug("Recieved greeting: %s" % res["greeting"])


     def get_cpu_topology(self):
         ''' figure out how many processors we have available'''

         topology = CPUtopology()
         topology.parse()

         self.numcores = topology.getCPUcores(True)
         self.debug("counted %d cores (%d online) and %d sockets" %
                    (topology.getCPUcores(False), self.numcores,
                     topology.getCPUsockets()))
         return topology.getXMLdata()

     def __get_services_sysvinit(self):
         reject = ('functions', 'halt', 'killall', 'single', 'linuxconf', 'kudzu',
                   'skeleton', 'README', '*.dpkg-dist', '*.dpkg-old', 'rc', 'rcS',
                   'single', 'reboot', 'bootclean.sh')
         for sdir in ('/etc/init.d', '/etc/rc.d/init.d'):
             if os.path.isdir(sdir):
                 servicesdir = sdir
                 break
         if not servicesdir:
             raise RuntimeError, "No services dir (init.d) found on your system"
         self.debug("Services located in %s, going through each service file to check status" % servicesdir)
         ret_services = {}
         for service in glob.glob(os.path.join(servicesdir, '*')):
             servicename = os.path.basename(service)
             if not [1 for p in reject if fnmatch.fnmatch(servicename, p)] and os.access(service, os.X_OK):
                 cmd = '%s -qs "\(^\|\W\)status)" %s' % (getcmdpath('grep'), service)
                 c = subprocess.Popen(cmd, shell=True)
                 c.wait()
                 if c.returncode == 0:
                     cmd = ['env', '-i', 'LANG="%s"' % os.environ['LANG'], 'PATH="%s"' % os.environ['PATH'], 'TERM="%s"' % os.environ['TERM'], service, 'status']
                     c = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     c.wait()
                     if c.returncode == 0 and (c.stdout.read() or c.stderr.read()):
                         ret_services[servicename] = 'running'
                     else:
                         ret_services[servicename] = 'not running'
                 else:
                     ret_services[servicename] = 'unknown'
         return ret_services

     def __get_services_systemd(self):
         ret_services = {}
         cmd = '%s list-unit-files -t service --no-legend' % getcmdpath('systemctl')
         self.debug("cmd: %s" % cmd)
         c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         for p in c.stdout:
             # p are lines like "servicename.service status"
             v = p.strip().split()
             ret_services[v[0].split('.')[0]] = v[1]
         return ret_services

     def get_services(self):
         cmd = [getcmdpath('ps'), '-ocomm=',  '1']
         c = subprocess.Popen(cmd, stdout=subprocess.PIPE)
         self.init = c.stdout.read().strip()
         if self.init == 'systemd':
             self.debug("Using systemd to get services status")
             return self.__get_services_systemd()
         elif self.init == 'init':
             self.init = 'sysvinit'
             self.debug("Using sysvinit to get services status")
             return self.__get_services_sysvinit()
         else:
             raise RuntimeError, "Unknown init system (%s)" % self.init
         return {}

     def get_kthreads(self):
         policies = {'FF':'fifo', 'RR':'rrobin', 'TS':'other', '?':'unknown' }
         ret_kthreads = {}
         self.debug("getting kthread status")
         cmd = '%s -eocommand,pid,policy,rtprio,comm' % getcmdpath('ps')
         self.debug("cmd: %s" % cmd)
         c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
         for p in c.stdout:
             v = p.strip().split()
             kcmd = v.pop(0)
             try:
                 if int(v[0]) > 0 and kcmd.startswith('[') and kcmd.endswith(']'):
                     ret_kthreads[v[0]] = {'policy' : policies[v[1]],
                                           'priority' : v[2], 'name' : v[3] }
             except ValueError:
                 pass    # Ignore lines which don't have a number in the first row
         return ret_kthreads

     def get_modules(self):
         modlist = []
         try:
             fp = open('/proc/modules', 'r')
             line = fp.readline()
             while line:
                 mod = line.split()
                 modlist.append({"modname": mod[0],
                                 "modsize": mod[1],
                                 "numusers": mod[2],
                                 "usedby": mod[3],
                                 "modstate": mod[4]})
                 line = fp.readline()
             fp.close()
         except Exception, err:
             raise err
         return modlist

     def parse_options(self, cmdargs):
         '''parse the command line arguments'''
         parser = optparse.OptionParser()
         parser.add_option("-d", "--duration", dest="duration",
                           type="string", default=self.config.duration,
                           help="specify length of test run (default: %default)")
         parser.add_option("-v", "--verbose", dest="verbose",
                           action="store_true", default=self.config.verbose,
                           help="turn on verbose prints (default: %default)")
         parser.add_option("-w", "--workdir", dest="workdir",
                           type="string", default=self.workdir,
                           help="top directory for rteval data (default: %default)")
         parser.add_option("-l", "--loaddir", dest="srcdir",
                           type="string", default=self.config.srcdir,
                           help="directory for load source tarballs (default: %default)")
         parser.add_option("-i", "--installdir", dest="installdir",
                           type="string", default=self.config.installdir,
                           help="place to locate installed templates (default: %default)")
         parser.add_option("-s", "--sysreport", dest="sysreport",
                           action="store_true", default=self.config.sysreport,
                           help='run sysreport to collect system data (default: %default)')
         parser.add_option("-D", '--debug', dest='debugging',
                           action='store_true', default=self.config.debugging,
                           help='turn on debug prints (default: %default)')
         parser.add_option("-X", '--xmlrpc-submit', dest='xmlrpc',
                           action='store', default=self.config.xmlrpc, metavar='HOST',
                           help='Hostname to XML-RPC server to submit reports')
         parser.add_option("-P", "--xmlrpc-no-abort", dest="xmlrpc_noabort",
                           action='store_true', default=False,
                           help="Do not abort if XML-RPC server do not respond to ping request");
         parser.add_option("-Z", '--summarize', dest='summarize',
                           action='store_true', default=False,
                           help='summarize an already existing XML report')
         parser.add_option("-H", '--raw-histogram', dest='rawhistogram',
                           action='store_true', default=False,
                           help='Generate raw histogram data for an already existing XML report')
         parser.add_option("-f", "--inifile", dest="inifile",
                           type='string', default=None,
                           help="initialization file for configuring loads and behavior")
         parser.add_option("-a", "--annotate", dest="annotate",
                           type="string", default=None,
                           help="Add a little annotation which is stored in the report")
         parser.add_option("-L", "--logging", dest="logging",
                          action='store_true', default=False,
                          help='log the output of the loads in the report directory')
         parser.add_option("-O", "--onlyload", dest="onlyload",
                           action='store_true', default=False,
                           help="only run the loads (don't run measurement threads)")

         # module options
         parser.add_option("", "--cyclictest-interval", dest="cyclictest_interval",
                           action="store", type="int",
                           help="cyclictest measurement interval in microseconds")
         parser.add_option("", "--cyclictest-distance", dest="cyclictest_distance",
                           action="store", type="int",
                           help="cyclictest measurement interval increment in microseconds")
         parser.add_option("", "--cyclictest-buckets", dest="cyclictest_buckets",
                           action="store", type="int",
                           help="number of cyclictest 1 microsecond histogram buckets")
         parser.add_option("", "--cyclictest-priority", dest="cyclictest_priority",
                           action="store", type="int",
                           help="SCHED_FIFO priority of measurement threads")

         parser.add_option("", "--hackbench-jobspercore", dest="hackbench_jobspercore",
                           action="store", type="int",
                           help="number of hackbench jobs per-core")
         parser.add_option("", "--kcompile-jobspercore", dest="kcompile_jobspercore",
                           action="store", type="int",
                           help="number of kernel compile jobs per-core")


         (self.cmd_options, self.cmd_arguments) = parser.parse_args(args = cmdargs)
         if self.cmd_options.duration:
             mult = 1.0
             v = self.cmd_options.duration.lower()
             if v.endswith('s'):
                 v = v[:-1]
             elif v.endswith('m'):
                 v = v[:-1]
                 mult = 60.0
             elif v.endswith('h'):
                 v = v[:-1]
                 mult = 3600.0
             elif v.endswith('d'):
                 v = v[:-1]
                 mult = 3600.0 * 24.0
             self.cmd_options.duration = float(v) * mult
         self.workdir = os.path.abspath(self.cmd_options.workdir)


     def debug(self, str):
         if self.config.debugging is True:
             print "rteval: %s" % str

     def info(self, str):
         if self.config.verbose is True:
             print str

     def run_sysreport(self):
         import glob
         if os.path.exists('/usr/sbin/sosreport'):
             exe = '/usr/sbin/sosreport'
         elif os.path.exists('/usr/sbin/sysreport'):
             exe = '/usr/sbin/sysreport'
         else:
             raise RuntimeError, "Can't find sosreport/sysreport"

         self.debug("report tool: %s" % exe)
         options =  ['-k', 'rpm.rpmva=off',
                     '--name=rteval',
                     '--batch',
                     '--no-progressbar']

         self.info("Generating SOS report")
         self.info("using command %s" % " ".join([exe]+options))
         subprocess.call([exe] + options)
         for s in glob.glob('/tmp/s?sreport-rteval-*'):
             self.debug("moving %s to %s" % (s, self.reportdir))
             shutil.move(s, self.reportdir)


     def genxml(self, duration, accum, samples, xslt = None):
         seconds = duration.seconds
         hours = seconds / 3600
         if hours: seconds -= (hours * 3600)
         minutes = seconds / 60
         if minutes: seconds -= (minutes * 60)
         (sys, node, release, ver, machine) = os.uname()

         # Start new XML report
         self.xmlreport = xmlout.XMLOut('rteval', self.version)
         self.xmlreport.NewReport()

         self.xmlreport.openblock('run_info', {'days': duration.days,
                                  'hours': hours,
                                  'minutes': minutes,
                                  'seconds': seconds})
         self.xmlreport.taggedvalue('date', self.start.strftime('%Y-%m-%d'))
         self.xmlreport.taggedvalue('time', self.start.strftime('%H:%M:%S'))
         if self.annotate:
             self.xmlreport.taggedvalue('annotate', self.annotate)
         self.xmlreport.closeblock()
         self.xmlreport.openblock('uname')
         self.xmlreport.taggedvalue('node', node)
         isrt = 1
         if ver.find(' RT ') == -1:
             isrt = 0
         self.xmlreport.taggedvalue('kernel', release, {'is_RT':isrt})
         self.xmlreport.taggedvalue('arch', machine)
         self.xmlreport.taggedvalue('baseos', self.baseos)
         self.xmlreport.closeblock()

         self.xmlreport.openblock("clocksource")
         self.xmlreport.taggedvalue('current', self.current_clocksource)
         self.xmlreport.taggedvalue('available', self.available_clocksource)
         self.xmlreport.closeblock()

         self.xmlreport.openblock('hardware')
         self.xmlreport.AppendXMLnodes(self.cputopology)
         self.xmlreport.taggedvalue('numa_nodes', self.numanodes)
         self.xmlreport.taggedvalue('memory_size', "%.3f" % self.memsize[0], {"unit": self.memsize[1]})
         self.xmlreport.closeblock()

         self.xmlreport.openblock('services', {'init': self.init})
         for s in self.services:
             self.xmlreport.taggedvalue("service", self.services[s], {"name": s})
         self.xmlreport.closeblock()

         keys = self.kthreads.keys()
         if len(keys):
             keys.sort()
             self.xmlreport.openblock('kthreads')
             for pid in keys:
                 self.xmlreport.taggedvalue('thread', self.kthreads[pid]['name'],
                                            { 'policy' : self.kthreads[pid]['policy'],
                                              'priority' : self.kthreads[pid]['priority'],
                                              })
             self.xmlreport.closeblock()

         modlist = util.get_modules()
         if len(modlist):
             self.xmlreport.openblock('kernelmodules')
             for mod in modlist:
                 self.xmlreport.openblock('module')
                 self.xmlreport.taggedvalue('info', mod['modname'],
                                            {'size': mod['modsize'],
                                             'state': mod['modstate'],
                                             'numusers': mod['numusers']})
                 if mod['usedby'] != '-':
                     self.xmlreport.openblock('usedby')
                     for ub in mod['usedby'].split(','):
                         if len(ub):
                             self.xmlreport.taggedvalue('module', ub, None)
                     self.xmlreport.closeblock()
                 self.xmlreport.closeblock()
             self.xmlreport.closeblock()

         #
         # Retrieve configured IP addresses
         #
         self.xmlreport.openblock('network_config')

         # Get the interface name for the IPv4 default gw
         route = open('/proc/net/route')
         defgw4 = None
         if route:
             rl = route.readline()
             while rl != '' :
                 rl = route.readline()
                 splt = rl.split("\t")
                 # Only catch default route
                 if len(splt) > 2 and splt[2] != '00000000' and splt[1] == '00000000':
                     defgw4 = splt[0]
                     break
             route.close()

         # Make an interface tag for each device found
         if hasattr(ethtool, 'get_interfaces_info'):
             # Using the newer python-ethtool API (version >= 0.4)
             for dev in ethtool.get_interfaces_info(ethtool.get_devices()):
                 if cmp(dev.device,'lo') == 0:
                     continue

                 self.xmlreport.openblock('interface',
                                          {'device': dev.device,
                                           'hwaddr': dev.mac_address}
                                          )

                 # Protcol configurations
                 if dev.ipv4_address:
                     self.xmlreport.openblock('IPv4',
                                              {'ipaddr': dev.ipv4_address,
                                               'netmask': dev.ipv4_netmask,
                                               'broadcast': dev.ipv4_broadcast,
                                               'defaultgw': (defgw4 == dev.device) and '1' or '0'}
                                              )
                     self.xmlreport.closeblock()

                 for ip6 in dev.get_ipv6_addresses():
                     self.xmlreport.openblock('IPv6',
                                              {'ipaddr': ip6.address,
                                               'netmask': ip6.netmask,
                                               'scope': ip6.scope}
                                              )
                     self.xmlreport.closeblock()
                 self.xmlreport.closeblock()
         else: # Fall back to older python-ethtool API (version < 0.4)
             ifdevs = ethtool.get_active_devices()
             ifdevs.remove('lo')
             ifdevs.sort()

             for dev in ifdevs:
                 self.xmlreport.openblock('interface',
                                          {'device': dev,
                                           'hwaddr': ethtool.get_hwaddr(dev)}
                                          )
                 self.xmlreport.openblock('IPv4',
                                          {'ipaddr': ethtool.get_ipaddr(dev),
                                           'netmask': ethtool.get_netmask(dev),
                                           'defaultgw': (defgw4 == dev) and '1' or '0'}
                                          )
                 self.xmlreport.closeblock()
                 self.xmlreport.closeblock()
         self.xmlreport.closeblock()

         self.xmlreport.openblock('loads', {'load_average':str(accum / samples)})
         for load in self.loads:
             load.genxml(self.xmlreport)
         self.xmlreport.closeblock()
         self.cyclictest.genxml(self.xmlreport)

         # now generate the dmidecode data for this host
         d = dmi.DMIinfo(self.config.GetSection('rteval'))
         d.genxml(self.xmlreport)

         # Close the report - prepare for return the result
         self.xmlreport.close()

         # Write XML (or write XSLT parsed XML if xslt != None)
         if self.xml != None:
             self.xmlreport.Write(self.xml, xslt)
         else:
             # If no file is set, use stdout
             self.xmlreport.Write("-", xslt) # libxml2 defines a filename as "-" to be stdout


     def report(self):
         "Create a screen report, based on a predefined XSLT template"
         self.xmlreport.Write("-", self.config.xslt_report)

     def XMLreport(self):
         "Retrieves the complete rteval XML report as a libxml2.xmlDoc object"
         return self.xmlreport.GetXMLdocument()

     def show_report(self, xmlfile, xsltfile):
         '''summarize a previously generated xml file'''
         print "Loading %s for summarizing" % xmlfile

         xsltfullpath = os.path.join(self.config.installdir, xsltfile)
         if not os.path.exists(xsltfullpath):
             raise RuntimeError, "can't find XSL template (%s)!" % xsltfullpath

         xmlreport = xmlout.XMLOut('rteval', self.version)
         xmlreport.LoadReport(xmlfile)
         xmlreport.Write('-', xsltfullpath)
         del xmlreport

     def start_loads(self):
         if len(self.loads) == 0:
             raise RuntimeError, "start_loads: No loads defined!"
         self.info ("starting loads:")
         for l in self.loads:
             l.start()
         # now wait until they're all ready
         self.info("waiting for ready from all loads")
         ready=False
         while not ready:
             busy = 0
             for l in self.loads:
                 if not l.isAlive():
                     raise RuntimeError, "%s died" % l.name
                 if not l.isReady():
                     busy += 1
                     self.debug("waiting for %s" % l.name)
             if busy:
                 time.sleep(1.0)
             else:
                 ready = True

     def stop_loads(self):
         if len(self.loads) == 0:
             raise RuntimeError, "stop_loads: No loads defined!"
         self.info("stopping loads: ")
         for l in self.loads:
             self.info("\t%s" % l.name)
             l.stopevent.set()
             l.join(2.0)

     def make_report_dir(self):
         t = self.start
         i = 1
         self.reportdir = os.path.join(self.workdir,
                                       t.strftime("rteval-%Y%m%d-"+str(i)))
         while os.path.exists(self.reportdir):
             i += 1
             self.reportdir = os.path.join(self.workdir,
                                           t.strftime('rteval-%Y%m%d-'+str(i)))
         if not os.path.isdir(self.reportdir):
             os.mkdir(self.reportdir)
             os.mkdir(os.path.join(self.reportdir, "logs"))
         return self.reportdir

     def get_dmesg(self):
         dpath = "/var/log/dmesg"
         if not os.path.exists(dpath):
             print "dmesg file not found at %s" % dpath
             return
         shutil.copyfile(dpath, os.path.join(self.reportdir, "dmesg"))


     def show_remaining_time(self, remaining):
         r = int(remaining)
         days = r / 86400
         if days: r = r - (days * 86400)
         hours = r / 3600
         if hours: r = r - (hours * 3600)
         minutes = r / 60
         if minutes: r = r - (minutes * 60)
         print "rteval time remaining: %d days, %d hours, %d minutes, %d seconds" % (days, hours, minutes, r)


     def measure(self):
         # Collect misc system info
         self.baseos = util.get_base_os()
         self.cputopology = self.get_cpu_topology()
         self.numanodes = util.get_num_nodes()
         self.memsize = util.get_memory_size()
         (self.current_clocksource, self.available_clocksource) = util.get_clocksources()
         self.services = self.get_services()
         self.kthreads = self.get_kthreads()

         onlyload = self.cmd_options.onlyload

         builddir = os.path.join(self.workdir, 'rteval-build')
         if not os.path.isdir(builddir): os.mkdir(builddir)
         self.reportfile = os.path.join(self.reportdir, "summary.rpt")
         self.xml = os.path.join(self.reportdir, "summary.xml")

         # read in loads from the ini file
         self.load_modules = []
         loads = self.config.GetSection("loads")
         for l in loads:
             # hope to eventually have different kinds but module is only on
             # for now (jcw)
             if l[1].lower() == 'module':
                 self.info("importing load module %s" % l[0])
                 self.load_modules.append(__import__(l[0]))

         self.info("setting up loads")
         self.loads = []
         params = {'workdir':self.workdir,
                   'reportdir':self.reportdir,
                   'builddir':builddir,
                   'srcdir':self.config.srcdir,
                   'verbose': self.config.verbose,
                   'debugging': self.config.debugging,
                   'numcores':self.numcores,
                   'logging':self.config.logging,
                   'memsize':self.memsize,
                   'numanodes':self.numanodes,
                   'duration':self.config.duration,
                   }

         for m in self.load_modules:
             self.config.AppendConfig(m.__name__, params)
             self.info("creating load instance for %s" % m.__name__)
             self.loads.append(m.create(self.config.GetSection(m.__name__)))

         if not onlyload:
             self.config.AppendConfig('cyclictest', params)
             self.info("setting up cyclictest")
             self.cyclictest = cyclictest.Cyclictest(params=self.config.GetSection('cyclictest'))

         nthreads = 0
         try:
             # start the loads
             self.start_loads()

             print "rteval run on %s started at %s" % (os.uname()[2], time.asctime())
             print "started %d loads on %d cores" % (len(self.loads), self.numcores),
             if self.numanodes > 1:
                 print " with %d numa nodes" % self.numanodes
             else:
                 print ""
             print "Run duration: %d seconds" % self.config.duration

             start = datetime.now()

             if not onlyload:
                 # start the cyclictest thread
                 self.info("starting cyclictest")
                 self.cyclictest.start()

             # turn loose the loads
             self.info("sending start event to all loads")
             for l in self.loads:
                 l.startevent.set()
                 nthreads += 1

             accum = 0.0
             samples = 0

             report_interval = int(self.config.GetSection('rteval').report_interval)

             # wait for time to expire or thread to die
             signal.signal(signal.SIGINT, sigint_handler)
             signal.signal(signal.SIGTERM, sigterm_handler)
             self.info("waiting for duration (%f)" % self.config.duration)
             stoptime = (time.time() + self.config.duration)
             currtime = time.time()
             rpttime = currtime + report_interval
             loadcount = 5
             while (currtime <= stoptime) and not sigint_received:
                 time.sleep(1.0)
                 if not onlyload and not self.cyclictest.isAlive():
                     raise RuntimeError, "cyclictest thread died!"
                 if len(threading.enumerate()) < nthreads:
                     raise RuntimeError, "load thread died!"
                 if not loadcount:
                     # open the loadavg /proc entry
                     p = open("/proc/loadavg")
                     load = float(p.readline().split()[0])
                     p.close()
                     accum += load
                     samples += 1
                     loadcount = 5
                     #self.debug("current loadavg: %f, running avg: %f (load: %f, samples: %d)" % \
                     #               (load, accum/samples, load, samples))
                 else:
                     loadcount -= 1
                 if currtime >= rpttime:
                     left_to_run = stoptime - currtime
                     self.show_remaining_time(left_to_run)
                     rpttime = currtime + report_interval
                     print "load average: %.2f" % (accum / samples)
                 currtime = time.time()
             self.debug("out of measurement loop")
             signal.signal(signal.SIGINT, signal.SIG_DFL)
             signal.signal(signal.SIGTERM, signal.SIG_DFL)

         except RuntimeError, e:
             print "Runtime error during measurement: %s", e
             raise

         finally:
             if not onlyload:
                 # stop cyclictest
                 self.cyclictest.stopevent.set()

             # stop the loads
             self.stop_loads()

         print "stopping run at %s" % time.asctime()
         if not onlyload:
             # wait for cyclictest to finish calculating stats
             self.cyclictest.finished.wait()
             self.genxml(datetime.now() - start, accum, samples)
             self.report()
             if self.config.sysreport:
                 self.run_sysreport()


     def XMLRPC_Send(self):
         "Sends the report to a given XML-RPC host.  Returns 0 on success or 2 on submission failure."

         if not self.config.xmlrpc:
             return 2

         url = "http://%s/rteval/API1/" % self.config.xmlrpc
         attempt = 0
         exitcode = 2   # Presume failure
         warning_sent = False
         while attempt < 6:
             try:
                 client = rtevalclient.rtevalclient(url)
                 print "Submitting report to %s" % url
                 rterid = client.SendReport(self.xmlreport.GetXMLdocument())
                 print "Report registered with submission id %i" % rterid
                 attempt = 10
                 exitcode = 0 # Success
             except socket.error:
                 if (self.mailer is not None) and (not warning_sent):
                     self.mailer.SendMessage("[RTEVAL:WARNING] Failed to submit report to XML-RPC server",
                                             "Server %s did not respond.  Not giving up yet."
                                             % self.config.xmlrpc)
                     warning_sent = True

                 attempt += 1
                 if attempt > 5:
                     break # To avoid sleeping before we abort

                 print "Failed sending report.  Doing another attempt(%i) " % attempt
                 time.sleep(attempt*5*60) # Incremental sleep - sleep attempts*5 minutes

             except Exception, err:
                 raise err

         if (self.mailer is not None):
             # Send final result messages
             if exitcode == 2:
                 self.mailer.SendMessage("[RTEVAL:FAILURE] Failed to submit report to XML-RPC server",
                                         "Server %s did not respond at all after %i attempts."
                                         % (self.config.xmlrpc, attempt - 1))
             elif (exitcode == 0) and warning_sent:
                 self.mailer.SendMessage("[RTEVAL:SUCCESS] XML-RPC server available again",
                                         "Succeeded to submit the report to %s in the end."
                                         % (self.config.xmlrpc))
         return exitcode


     def tar_results(self):
         if not os.path.isdir(self.reportdir):
             raise RuntimeError, "no such directory: %s" % self.reportdir
         import tarfile
         dirname = os.path.dirname(self.reportdir)
         rptdir = os.path.basename(self.reportdir)
         cwd = os.getcwd()
         os.chdir(dirname)
         try:
             t = tarfile.open(rptdir + ".tar.bz2", "w:bz2")
             t.add(rptdir)
             t.close()
         except:
             os.chdir(cwd)

     def summarize(self, file):
         isarchive = False
         summary = file
         if file.endswith(".tar.bz2"):
             import tarfile
             try:
                 t = tarfile.open(file)
             except:
                 print "Don't know how to summarize %s (tarfile open failed)" % file
                 return
             element = None
             for f in t.getnames():
                 if f.find('summary.xml') != -1:
                     element = f
                     break
             if element == None:
                 print "No summary.xml found in tar archive %s" % file
                 return
             tmp = tempfile.gettempdir()
             self.debug("extracting %s from %s for summarizing" % (element, file))
             t.extract(element, path=tmp)
             summary = os.path.join(tmp, element)
             isarchive = True
         self.show_report(summary, 'rteval_text.xsl')
         if isarchive:
             os.unlink(summary)

     def rteval(self):
         ''' main function for rteval'''
         retval = 0;

         # Parse initial DMI decoding errors
         dmi.ProcessWarnings()

         # if --summarize was specified then just parse the XML, print it and exit
         if self.cmd_options.summarize or self.cmd_options.rawhistogram:
             if len(self.cmd_arguments) < 1:
                 raise RuntimeError, "Must specify at least one XML file with --summarize!"

             for x in self.cmd_arguments:
                 if self.cmd_options.summarize:
                     self.summarize(x)
                 elif self.cmd_options.rawhistogram:
                     self.show_report(x, 'rteval_histogram_raw.xsl')

             sys.exit(0)

         if os.getuid() != 0:
             print "Must be root to run rteval!"
             sys.exit(-1)

         self.debug('''rteval options:
         workdir: %s
         loaddir: %s
         reportdir: %s
         verbose: %s
         debugging: %s
         logging:  %s
         duration: %f
         sysreport: %s
         inifile:  %s''' % (self.workdir, self.config.srcdir, self.reportdir, self.config.verbose,
                            self.config.debugging, self.config.logging, self.config.duration,
                            self.config.sysreport, self.inifile))

         if not os.path.isdir(self.workdir):
             raise RuntimeError, "work directory %d does not exist" % self.workdir

         # create our report directory
         try:
             self.make_report_dir()
         except:
             print "Cannot create the report dir!"
             print "(is this an NFS filesystem with rootsquash turned on?)"
             sys.exit(-1)

         self.measure()

         # if --xmlrpc-submit | -X was given, send our report to this host
         if self.config.xmlrpc:
             retval = self.XMLRPC_Send()

         self.get_dmesg()
         self.tar_results()

         return retval

 if __name__ == '__main__':
     import pwd, grp

     try:
         # Parse initial DMI decoding errors
         dmi.ProcessWarnings()

         rteval = RtEval(sys.argv[1:])
         ec = rteval.rteval()
         rteval.debug("exiting with exit code: %d" % ec)
         sys.exit(ec)
     except KeyboardInterrupt:
         sys.exit(0)