[netperf-dev] netperf2 commit notice r596 - trunk/doc/examples

raj at netperf.org raj at netperf.org
Mon Jun 25 13:43:59 PDT 2012


Author: raj
Date: 2012-06-25 13:43:59 -0700 (Mon, 25 Jun 2012)
New Revision: 596

Added:
   trunk/doc/examples/post_proc.py
Modified:
   trunk/doc/examples/post_proc.sh
Log:
initial check-in of python-based netperf runemomniaggdemo.sh script

Added: trunk/doc/examples/post_proc.py
===================================================================
--- trunk/doc/examples/post_proc.py	                        (rev 0)
+++ trunk/doc/examples/post_proc.py	2012-06-25 20:43:59 UTC (rev 596)
@@ -0,0 +1,352 @@
+#!/usr/bin/python
+
+# This is a re-writing of post_proc.sh into Python.  Feel free to
+# provide feedback on how to make it better - either better
+# post-processing or better Python.  Keep in mind it is only the
+# second Python script I have ever written :) raj 2012-06-25
+
+# This script will run much faster than post-proc.sh does.  Some of
+# that may be Python versus bash+awk+grep+rrdtool.  Much of that is
+# undoubtedly from better alogorithms - not going through the data
+# umpteen times.  For example, for a test which had up to 8 netperfs
+# running and so 8 files to be post-processed:
+#
+#raj at tardy:~/netperf2_trunk/doc/examples$ time ./post_proc.py netperf_tps.log
+#Prefix is netperf_tps
+#Average of peak interval is 581554.430 Trans/s from 1340326282 to 1340326404
+#Minimum of peak interval is 464912.610 Trans/s from 1340326282 to 1340326404
+#Maximum of peak interval is 594025.670 Trans/s from 1340326282 to 1340326404
+#
+#real	0m0.450s
+#user	0m0.430s
+#sys	0m0.010s
+#raj at tardy:~/netperf2_trunk/doc/examples$ time ./post_proc.sh netperf_tps.log
+#Prefix is netperf_tps
+#Performing overall summary computations
+#Average of peak interval is 581460 Trans/s from 1340326283 to 1340326404
+#Minimum of peak interval is 464913 Trans/s from 1340326283 to 1340326404
+#Maximum of peak interval is 594026 Trans/s from 1340326283 to 1340326404
+
+import os
+import sys
+import glob
+import math
+import rrdtool
+
+def find_vrules(source):
+    vrules = []
+    interval_times = []
+    interval_start = 0
+    interval_end = 0
+    netperf_count=0
+    start_time=0.0
+    end_time=0.0
+    RED="FF0000"
+    BLACK="000000"
+    resumes=False
+
+    for line in source:
+        if "Starting netperfs on" in line:
+            netperf_count += 1
+        elif "Pausing" in line:
+            fields = line.split()
+            plural=''
+            if netperf_count > 1:
+                plural = 's'
+            vrule = 'VRULE:%d#%s:%d netperf%s running' % (int(float(fields[5])),RED,int(fields[7]),plural)
+            vrules.append(vrule)
+            interval_start = int(float(fields[5]))
+        elif "Resuming" in line:
+            fields = line.split()
+            if resumes:
+                resume_text=''
+            else:
+                resume_text='Resuming ramp'
+
+            vrule = "VRULE:%d#%s:%s" % (int(float(fields[2])),BLACK,resume_text)
+            vrules.append(vrule)
+            interval_end = int(float(fields[2]))
+            interval_times.append((interval_start,interval_end))
+            resumes=True
+        elif "Starting netperfs at" in line:
+            start_time = line.split()[3]
+        elif "Netperfs started" in line:
+            fields = line.split()
+            vrule = 'VRULE:%d#%s:All %d netperfs running' % (int(float(fields[3])),RED,netperf_count)
+            vrules.append(vrule)
+            interval_start = int(float(fields[3]))
+        elif "Netperfs stopping" in line:
+            fields = line.split()
+            vrule = 'VRULE:%d#%s:Rampdown started' % (int(float(fields[2]))-1,BLACK)
+            vrules.append(vrule)
+            interval_end = int(float(fields[2]))-1
+            interval_times.append((interval_start,interval_end))
+        elif "Netperfs stopped" in line:
+            end_time = line.split()[2]
+
+    return vrules,float(start_time),float(end_time),interval_times
+
+def open_rrd(basename,start_time,end_time,max_interval):
+    output = "Would have opened %s.rrd with start time %d and end time %d" % (basename,int(start_time),int(end_time))
+
+    data_sources = [ 'DS:mbps:GAUGE:%d:U:U' % max_interval ]
+    rra = [ 'RRA:AVERAGE:0.5:1:%d' % ((int(end_time) - int(start_time)) + 1) ]
+
+    rrdtool.create(basename + ".rrd",
+                   '--step', '1',
+                   '--start', str(int(start_time)-1),
+                   data_sources,
+                   rra )
+
+def update_heartbeat(basename,heartbeat):
+    print "Updating heartbeat with %d" % heartbeat
+    rrdtool.tune(basename + ".rrd",
+                 '--heartbeat', 'mbps:%d' % heartbeat)
+
+
+def update_rrd(basename,value,timestamp):
+    rrdtool.update(basename + '.rrd',
+                   '%.3f:%f' % (timestamp, value))
+
+def add_to_ksink(basename,start_time,end_time,ksink):
+    ((first, last, step),name,results) = rrdtool.fetch(basename + ".rrd",
+                                                       'AVERAGE',
+                                                       '--start', str(int(start_time)),
+                                                       '--end', str(int(end_time)))
+#    print "First %d last %d step %d results %d" % (first, last, step, len(results))
+    for key,result in enumerate(results,first):
+        if result[0] and key in ksink:
+            ksink[key] += float(result[0])
+        else:
+            if result[0]:
+                print "Key %d not in ksink" % key
+
+def process_result(basename, raw_results, start_time,end_time, ksink):
+    first_result = True
+    have_result = False
+    interim_result=0.0
+    interim_units="Trans/s"
+    interim_interval=1.0
+    interim_end=0.0
+    max_interval=1
+
+
+
+    for raw_result in raw_results:
+        if "Interim result:" in raw_result:
+            # human format
+            fields = raw_result.split()
+            interim_result=float(fields[2])
+            interim_units=fields[3]
+            interim_interval=float(fields[5])
+            interim_end=float(fields[9])
+            have_result=True
+        elif "NETPERF_INTERIM_RESULT" in raw_result:
+            # keyval
+            interim_result=float(raw_result.split('=')[1])
+            have_result=False
+        elif "NETPERF_UNITS" in raw_result:
+            # keyval
+            interim_units=raw_result.split('=')[1]
+            have_result=False
+        elif "NETPERF_INTERVAL" in raw_result:
+            # keyval
+            interim_interval=float(raw_result.split('=')[1])
+            have_result=False
+        elif "NETPERF_ENDING" in raw_result:
+            # keyval
+            interim_end=float(raw_result.split('=')[1])
+            have_result=True
+        else:
+            # csv
+            fields = raw_result.split(',')
+            if len(fields) == 4:
+                interim_result = float(fields[0])
+                interim_units = fields[1]
+                interim_interval = float(fields[2])
+                interim_end = float(fields[3])
+                have_result = True
+            else:
+                have_result = False
+                
+        if first_result:
+            # we could use the overal start time, but using the
+            # first timestamp for this instance may save us some
+            # space in the rrdfile
+            open_rrd(basename,interim_end,end_time,max_interval)
+            first_timestamp = interim_end
+            first_result = False
+            
+        if int(math.ceil(interim_interval)) > max_interval:
+            max_interval = int(math.ceil(interim_interval))
+            update_heartbeat(basename,max_interval)
+        
+        # perhaps one of these days, once we know that the rrdtool
+        # bits can handle it, we will build a big list of results and
+        # feed them en mass. until then we will dribble them one at a
+        # time
+        if have_result:
+            update_rrd(basename,interim_result,interim_end)
+            have_result = False
+
+    last_timestamp = interim_end
+#    print "First timestamp for this instance %f last %f" % (first_timestamp,last_timestamp)
+    return first_timestamp, last_timestamp
+
+def process_result_files(prefix,start_time,end_time,ksink):
+    print "Prefix is",prefix
+    
+    results_list = glob.glob(prefix+"*.out")
+
+    for result_file in results_list:
+        basename = result_file.replace(".out","")
+        raw_results = open(result_file,"r")
+        first_timestamp, last_timestamp = process_result(basename,
+                                                         raw_results,
+                                                         start_time,
+                                                         end_time,
+                                                         ksink)
+        # OK, now we get the massaged results
+        add_to_ksink(basename,first_timestamp,last_timestamp,ksink)
+
+def generate_overall(prefix,start_time,end_time,ksink):
+    overall = prefix + "_overall"
+    open_rrd(overall,start_time,end_time,1)
+
+    # this strikes me as being very brittle - can one really rely on
+    # dictionaries to iterate in order of the key when the key is an
+    # integer? for now I will assume so.
+    for key in ksink:
+        update_rrd(overall,ksink[key],key)
+
+def overall_min_max_avg(prefix,start_time,end_time,intervals):
+
+    max_average = 0.0
+    length = int(end_time) - int(start_time)
+
+    rrdtool.create(prefix + "_intervals.rrd",
+                   '--step', '1',
+                   '--start', str(int(start_time) - 1),
+                   'DS:avg:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length),
+                   'DS:min:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length),
+                   'DS:max:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length))
+
+    for id, interval in enumerate(intervals,start=1):
+        start = interval[0] + 1
+        end = interval[1] - 1
+        # we have no interest in the size of the graph (the first two
+        # items in the list) so slice just the part of interest
+        result = rrdtool.graph('/dev/null',
+                               '--start', str(start),
+                               '--end', str(end),
+                               'DEF:foo=%s_overall.rrd:mbps:AVERAGE' % prefix,
+                               'VDEF:avg=foo,AVERAGE',
+                               'VDEF:min=foo,MINIMUM',
+                               'VDEF:max=foo,MAXIMUM',
+                               'PRINT:avg:"%6.2lf"',
+                               'PRINT:min:"%6.2lf"',
+                               'PRINT:max:"%6.2lf"')[2]
+#        print "from %d to %d is %s" % (start,end,result)
+        iavg = float(result[0].strip('"'))
+        imin = float(result[1].strip('"'))
+        imax = float(result[2].strip('"'))
+        for time in xrange(start,end+1):
+            rrdtool.update(prefix + "_intervals.rrd",
+                           '%d:%f:%f:%f' % (time, iavg, imin, imax))
+        if iavg > max_average:
+            peak_interval_id = id;
+            max_average = iavg
+            max_minimum = imin
+            max_maximum = imax
+
+    return peak_interval_id, max_average, max_minimum, max_maximum
+
+def units_et_al_by_prefix(prefix):
+    units = "bits/s"
+    multiplier = "1000000"
+    direction = "Bidirectional"
+
+    if ("pps" in prefix) or ("tps" in prefix):
+        units = "Trans/s"
+        multiplier = "1"
+    elif "inbound" in prefix:
+        direction = "Inbound"
+    elif "outbound" in prefix:
+        direction = "Outbound"
+
+    return units, multiplier, direction
+
+def graph_overall(prefix,start_time,end_time,vrules,peak_interval_id=None,peak_average=0.0):
+
+    length = int(end_time) - int(start_time)
+
+    units, multiplier, direction = units_et_al_by_prefix(prefix)
+
+#    print units,multiplier,direction
+#    print "Vrules",vrules
+
+    interval_specs = []
+    if peak_interval_id:
+        interval_specs = [ 'DEF:bar=%s_intervals.rrd:avg:AVERAGE' % prefix,
+                           'CDEF:intvl=bar,%s,*' % multiplier,
+                           'LINE2:intvl#0F0F0F40:Interval average. Peak of %.3f during interval %d' % (peak_average, peak_interval_id) ]
+
+    rrdtool.graph(prefix + "_overall.png", '--imgformat', 'PNG',
+                  '--start', str(int(start_time)),
+                  '--end', str(int(end_time)),
+                  '-w','%d' % max(800,length),'-h','400',
+                  vrules,
+                  '--font', 'DEFAULT:0:Helvetica',
+                  '-t', 'Overall %s' % prefix,
+                  '-v', '%s %s' % (direction,units),
+                  'DEF:foo=%s_overall.rrd:mbps:AVERAGE' % prefix,
+                  'CDEF:bits=foo,%s,*' % multiplier,
+                  'LINE2:bits#00FF0080:%s' % units,
+                  interval_specs)
+
+def graph_individual(prefix,start_time,end_time,vrules):
+
+    units, multiplier, direction = units_et_al_by_prefix(prefix)
+
+    length = int(end_time) - int(start_time)
+
+    for individual in glob.glob(prefix+"*.out"):
+        basename = individual.strip(".out")
+        rrdtool.graph(basename + ".png",
+                      '--imgformat','PNG',
+                      '--start', str(int(start_time)),
+                      '--end', str(int(end_time)),
+                      '--font',  'DEFAULT:0:Helvetica',
+                      '-w', '%d' % max(800,length), '-h', '400',
+                      vrules,
+                      '-t', '%s %s' % (basename,prefix),
+                      '-v', '%s %s' % (direction, units),
+                      'DEF:foo=%s.rrd:mbps:AVERAGE' % basename,
+                      'CDEF:bits=foo,%s,*' % multiplier,
+                      'LINE2:bits#00FF0080:%s' % units)
+
+if __name__ == '__main__':
+
+    filename = sys.argv[1]
+    prefix = filename.replace(".log","")
+    source = open(filename,"r")
+    vrules,start_time,end_time,intervals = find_vrules(source)
+    #print vrules
+
+    # it would certainly be nice to be able to add to a dict on the fly
+    length = int(end_time + 1) - int(start_time)
+    ksink=dict(zip(xrange(int(start_time),
+                          int(end_time)+1),
+                   [0.0] * length))
+
+    process_result_files(prefix,start_time,end_time,ksink)
+    generate_overall(prefix,start_time,end_time,ksink)
+    peak_interval_id, peak_average, peak_minimum, peak_maximum = overall_min_max_avg(prefix,start_time,end_time,intervals)
+    graph_overall(prefix,start_time,end_time,vrules,peak_interval_id,peak_average)
+    graph_individual(prefix,start_time,end_time,vrules)
+    
+    # we only need the units
+    units = units_et_al_by_prefix(prefix)[0]
+    print "Average of peak interval is %.3f %s from %d to %d" % (peak_average, units, intervals[peak_interval_id-1][0], intervals[peak_interval_id-1][1])
+    print "Minimum of peak interval is %.3f %s from %d to %d" % (peak_minimum, units, intervals[peak_interval_id-1][0], intervals[peak_interval_id-1][1])
+    print "Maximum of peak interval is %.3f %s from %d to %d" % (peak_maximum, units, intervals[peak_interval_id-1][0], intervals[peak_interval_id-1][1])


Property changes on: trunk/doc/examples/post_proc.py
___________________________________________________________________
Added: svn:executable
   + *

Modified: trunk/doc/examples/post_proc.sh
===================================================================
--- trunk/doc/examples/post_proc.sh	2012-06-20 22:41:48 UTC (rev 595)
+++ trunk/doc/examples/post_proc.sh	2012-06-25 20:43:59 UTC (rev 596)
@@ -29,7 +29,7 @@
 
     . ./minsn
 
-# echo "MAX_INTERVAL $MAX_INTERVAL MIN_TIMESTAMP $MIN_TIMESTAMP MAX_TIMESTAMP $MAX_TIMESTAMP"
+#echo "MAX_INTERVAL $MAX_INTERVAL MIN_TIMESTAMP $MIN_TIMESTAMP MAX_TIMESTAMP $MAX_TIMESTAMP"
     LENGTH=`expr $MAX_TIMESTAMP - $MIN_TIMESTAMP`
     SIZE="-w $LENGTH -h 400"
 
@@ -78,7 +78,7 @@
 # have to use MAX_INTERVAL
 
 rrdtool create ${prefix}_overall.rrd --step 1 --start `expr $MIN_TIMESTAMP - 1` \
-    DS:mbps:GAUGE:2:U:U RRA:AVERAGE:0.5:1:$LENGTH
+    DS:mbps:GAUGE:1:U:U RRA:AVERAGE:0.5:1:$LENGTH
 
 for i in `seq $MIN_TIMESTAMP $MAX_TIMESTAMP`
 do



More information about the netperf-dev mailing list