[netperf-dev] netperf2 commit notice r65 - in trunk: . src

raj at netperf.org raj at netperf.org
Mon Apr 3 18:09:55 PDT 2006


Author: raj
Date: 2006-04-03 18:09:54 -0700 (Mon, 03 Apr 2006)
New Revision: 65

Modified:
   trunk/AUTHORS
   trunk/Release_Notes
   trunk/config.h.in
   trunk/configure
   trunk/configure.ac
   trunk/src/netcpu_looper.c
   trunk/src/netlib.c
   trunk/src/netlib.h
   trunk/src/netsh.c
Log:
Take-in a patch from Bret McKee which makes netcpu_looper actually
compile now :)  Using that as a starting point, modify netcpu_looper
to use the CPU affinity routine in netlib.c, which has been extended
to know whether or not a "cpu map" should be consulted to convert
the processor_affinity passed-in to bind_to_specific_cpu.  This is
used by the call made by netcpu_looper.c because on some systems
the CPU id space may not start from zero and be contiguous.  This
mapping is not necessary for the netperf/netserver CPU affinity
because we ass-u-me the user will have provided suitable CPU id's
in the -T option.


Modified: trunk/AUTHORS
===================================================================
--- trunk/AUTHORS	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/AUTHORS	2006-04-04 01:09:54 UTC (rev 65)
@@ -170,4 +170,8 @@
 Changes to retrieve CPU util on MacOS X.
 
 Dickon Reed
-Patches to attend to some windows in TCP_CRR and TCP_CC under Windows
\ No newline at end of file
+Patches to attend to some windows in TCP_CRR and TCP_CC under Windows
+
+Bret McKee <bret at hp.com>
+Fixes to get netcpu_looper compiling and working after the "netcpu"
+split

Modified: trunk/Release_Notes
===================================================================
--- trunk/Release_Notes	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/Release_Notes	2006-04-04 01:09:54 UTC (rev 65)
@@ -13,6 +13,16 @@
 *) Fixes for test lockups with TCP_CRR and TCP_CC under Windows
    courtesy of Dikon Reed.
 
+*) Fixes to netcpu_looper.c to get it to actually compile :)
+
+*) Have netcpu_looper use the bind_to_specific_processor() call
+   provided by netlib since that knows about more platforms than the
+   code in netcpu_looper did. The looper CPU binding will use a
+   mapping to handle cases where the CPU id's on the system may not be
+   a contiguous space starting from zero.  At present, the code that
+   setups the mapping only knows about retrieving actual CPU ids under
+   HP-UX.
+
 These are the Release Notes for Revision 2.4.1 of netperf:
 
 Things changed in this release:

Modified: trunk/config.h.in
===================================================================
--- trunk/config.h.in	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/config.h.in	2006-04-04 01:09:54 UTC (rev 65)
@@ -18,6 +18,9 @@
 /* Define to 1 if you have the <endian.h> header file. */
 #undef HAVE_ENDIAN_H
 
+/* Define to 1 if you have the <errno.h> header file. */
+#undef HAVE_ERRNO_H
+
 /* Define to 1 if you have the <fcntl.h> header file. */
 #undef HAVE_FCNTL_H
 
@@ -126,6 +129,9 @@
 /* Define to 1 if you have the `sendfile' function. */
 #undef HAVE_SENDFILE
 
+/* Define to 1 if you have the <signal.h> header file. */
+#undef HAVE_SIGNAL_H
+
 /* Define to 1 if you have the `socket' function. */
 #undef HAVE_SOCKET
 
@@ -165,6 +171,9 @@
 /* Define to 1 if you have the <sys/ioctl.h> header file. */
 #undef HAVE_SYS_IOCTL_H
 
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
 /* Define to 1 if you have the <sys/param.h> header file. */
 #undef HAVE_SYS_PARAM_H
 

Modified: trunk/configure
===================================================================
--- trunk/configure	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/configure	2006-04-04 01:09:54 UTC (rev 65)
@@ -3663,7 +3663,10 @@
 
 
 
-for ac_header in arpa/inet.h endian.h fcntl.h limits.h malloc.h netdb.h netinet/in.h stdlib.h string.h strings.h sys/ioctl.h sys/param.h sys/socket.h sys/time.h unistd.h
+
+
+
+for ac_header in arpa/inet.h endian.h errno.h fcntl.h limits.h malloc.h netdb.h netinet/in.h signal.h stdlib.h string.h strings.h sys/ioctl.h sys/mman.h sys/param.h sys/socket.h sys/time.h unistd.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if eval "test \"\${$as_ac_Header+set}\" = set"; then

Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/configure.ac	2006-04-04 01:09:54 UTC (rev 65)
@@ -27,7 +27,7 @@
 # Checks for header files.
 AC_HEADER_STDC
 AC_HEADER_SYS_WAIT
-AC_CHECK_HEADERS([arpa/inet.h endian.h fcntl.h limits.h malloc.h netdb.h netinet/in.h stdlib.h string.h strings.h sys/ioctl.h sys/param.h sys/socket.h sys/time.h unistd.h])
+AC_CHECK_HEADERS([arpa/inet.h endian.h errno.h fcntl.h limits.h malloc.h netdb.h netinet/in.h signal.h stdlib.h string.h strings.h sys/ioctl.h sys/mman.h sys/param.h sys/socket.h sys/time.h unistd.h])
 
 # Some platforms require these.  There may be a better way.
 AC_HAVE_LIBRARY(socket)

Modified: trunk/src/netcpu_looper.c
===================================================================
--- trunk/src/netcpu_looper.c	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/src/netcpu_looper.c	2006-04-04 01:09:54 UTC (rev 65)
@@ -21,7 +21,7 @@
 #if HAVE_UNISTD_H
 # include <unistd.h>
 #endif
-#if HAVE_MMAP
+#if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
 # include <sys/mman.h>
 #else
 # error netcpu_looper requires mmap
@@ -46,20 +46,41 @@
 # include <sys/wait.h>
 #endif
 
+#ifdef HAVE_SIGNAL_H
 #include <signal.h>
+#endif
+
+#ifdef HAVE_ERRNO_H
 #include <errno.h>
+#endif
 
 #include "netsh.h"
 #include "netlib.h"
 
 #define PAGES_PER_CHILD 2
 
+/* the lib_start_count and lib_end_count arrays hold the starting
+   and ending values of whatever is counting when the system is
+   idle. The rate at which this increments during a test is compared
+   with a previous calibrarion to arrive at a CPU utilization
+   percentage. raj 2005-01-26 */
+static uint64_t  lib_start_count[MAXCPUS];
+static uint64_t  lib_end_count[MAXCPUS];
+
+static int *cpu_mappings;
+
 static int lib_idle_fd;
 static uint64_t *lib_idle_address[MAXCPUS];
 static long     *lib_base_pointer;
 static pid_t     lib_idle_pids[MAXCPUS];
 static int       lib_loopers_running=0;
 
+/* we used to use this code to bind the loopers, but since we have
+   decided to enable processor affinity for the actual
+   netperf/netserver processes we will use that affinity routine,
+   which happens to know about more systems than this */
+
+#ifdef NOTDEF
 static void
 bind_to_processor(int child_num)
 {
@@ -143,6 +164,7 @@
 #endif /* __sun && _SVR4 */
 #endif /* __hpux */
 }
+#endif
 
  /* sit_and_spin will just spin about incrementing a value */
  /* this value will either be in a memory mapped region on Unix shared */
@@ -155,7 +177,7 @@
 sit_and_spin(int child_index)
 
 {
-  long *my_counter_ptr;
+  uint64_t *my_counter_ptr;
 
  /* only use C stuff if we are not WIN32 unless and until we */
  /* switch from CreateThread to _beginthread. raj 1/96 */
@@ -173,7 +195,7 @@
 #endif /* WIN32 */
 
   /* reset our base pointer to be at the appropriate offset */
-  my_counter_ptr = (long *) ((char *)lib_base_pointer + 
+  my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer + 
                              (netlib_get_page_size() * 
                               PAGES_PER_CHILD * child_index));
   
@@ -186,12 +208,12 @@
   /* area more readable. I'll probably do the same thine with the */
   /* "low pri code" raj 10/95 */
   
-  /* NOTE. I do *NOT* think it would be appropriate for the actual */
-  /* test processes to be bound to a  particular processor - that */
-  /* is something that should be left up to the operating system. */
+  /* since we are "flying blind" wrt where we should bind the looper
+     processes, we want to use the cpu_map that was prepared by netlib
+     rather than assume that the CPU ids on the system start at zero
+     and are contiguous. raj 2006-04-03 */
+  bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
   
-  bind_to_processor(child_index);
-  
   for (*my_counter_ptr = 0L;
        ;
        (*my_counter_ptr)++) {
@@ -338,6 +360,7 @@
       /* we are the child. we could decide to exec some separate */
       /* program, but that doesn't really seem worthwhile - raj 4/95 */
 
+      signal(SIGTERM, SIG_DFL);
       sit_and_spin(i);
 
       /* we should never really get here, but if we do, just exit(0) */
@@ -345,7 +368,7 @@
       break;
     default:
       /* we must be the parent */
-      lib_idle_address[i] = (long *) ((char *)lib_base_pointer + 
+      lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer + 
                                       (netlib_get_page_size() * 
                                        PAGES_PER_CHILD * i));
       if (debug) {
@@ -461,7 +484,7 @@
 calibrate_idle_rate (int iterations, int interval)
 {
 
-  long  
+  uint64_t
     firstcnt[MAXCPUS],
     secondcnt[MAXCPUS];
 
@@ -520,11 +543,11 @@
         fprintf(where,
                 "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
                 j,
-                firstcnt[j],
-                firstcnt[j],
+                (uint32_t)(firstcnt[j]>>32),
+                (uint32_t)(firstcnt[j]&0xffffffff),
                 j,
-                secondcnt[j],
-                secondcnt[j]);
+                (uint32_t)(secondcnt[j]>>32),
+                (uint32_t)(secondcnt[j]&0xffffffff));
       }
       /* we assume that it would wrap no more than once. we also */
       /* assume that the result of subtracting will "fit" raj 4/95 */
@@ -558,19 +581,76 @@
 
 }
 
-/* take the initial timestamp and start collecting CPU utilization if
-   requested */
+float
+calc_cpu_util_internal(float elapsed_time)
+{
+  int i;
+  float correction_factor;
+  float actual_rate;
 
+  lib_local_cpu_util = (float)0.0;
+  /* It is possible that the library measured a time other than */
+  /* the one that the user want for the cpu utilization */
+  /* calculations - for example, tests that were ended by */
+  /* watchdog timers such as the udp stream test. We let these */
+  /* tests tell up what the elapsed time should be. */
+  
+  if (elapsed_time != 0.0) {
+    correction_factor = (float) 1.0 + 
+      ((lib_elapsed - elapsed_time) / elapsed_time);
+  }
+  else {
+    correction_factor = (float) 1.0;
+  }
+
+  for (i = 0; i < lib_num_loc_cpus; i++) {
+
+    /* it would appear that on some systems, in loopback, nice is
+     *very* effective, causing the looper process to stop dead in its
+     tracks. if this happens, we need to ensure that the calculation
+     does not go south. raj 6/95 and if we run completely out of idle,
+     the same thing could in theory happen to the USE_KSTAT path. raj
+     8/2000 */ 
+    
+    if (lib_end_count[i] == lib_start_count[i]) {
+      lib_end_count[i]++;
+    }
+    
+    actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
+      (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
+      (float)(lib_end_count[i] - lib_start_count[i] +
+	      MAXLONG)/ lib_elapsed;
+    if (debug) {
+      fprintf(where,
+              "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
+              i,
+              actual_rate,
+              (uint32_t)(lib_start_count[i]>>32),
+              (uint32_t)(lib_start_count[i]&0xffffffff),
+              (uint32_t)(lib_end_count[i]>>32),
+              (uint32_t)(lib_end_count[i]&0xffffffff));
+    }
+    lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
+      lib_local_maxrate * 100;
+    lib_local_cpu_util += lib_local_per_cpu_util[i];
+  }
+  /* we want the average across all n processors */
+  lib_local_cpu_util /= (float)lib_num_loc_cpus;
+  
+  lib_local_cpu_util *= correction_factor;
+  return lib_local_cpu_util;
+
+
+}
 void
-measure_cpu_start()
+cpu_start_internal(void)
 {
-  cpu_method = PROC_STAT;
   get_cpu_idle(lib_start_count);
+  return;
 }
 
-/* collect final CPU utilization raw data */
 void
-measure_cpu_stop()
+cpu_stop_internal(void)
 {
   get_cpu_idle(lib_end_count);
 }

Modified: trunk/src/netlib.c
===================================================================
--- trunk/src/netlib.c	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/src/netlib.c	2006-04-04 01:09:54 UTC (rev 65)
@@ -1,5 +1,5 @@
 char    netlib_id[]="\
-@(#)netlib.c (c) Copyright 1993-2004 Hewlett-Packard Company. Version 2.3pl2";
+@(#)netlib.c (c) Copyright 1993-2006 Hewlett-Packard Company. Version 2.4.2";
 
 /****************************************************************/
 /*                                                              */
@@ -143,6 +143,10 @@
 #endif /* __osf__ */
 #endif /* WANT_DLPI */
 
+#ifdef HAVE_MPCTL
+#include <sys/mpctl.h>
+#endif
+
 #if !defined(HAVE_GETADDRINFO) || !defined(HAVE_GETNAMEINFO)
 # include "missing/getaddrinfo.h"
 #endif
@@ -203,6 +207,7 @@
         lib_remote_cpu_util;
 
 float   lib_local_per_cpu_util[MAXCPUS];
+int     lib_cpu_map[MAXCPUS];
 
 int     *request_array;
 int     *response_array;
@@ -1045,6 +1050,37 @@
   }
 }
 #endif /* WANT_INTERVALS */
+
+void
+netlib_init_cpu_map() {
+
+  int i;
+  int num;
+#ifdef HAVE_MPCTL
+  i = 0;
+  /* I go back and forth on whether this should be the system-wide set
+     of calls, or if the processor set versions (sans the _SYS) should
+     be used.  at the moment I believe that the system-wide version
+     should be used. raj 2006-04-03 */
+  num = mpctl(MPC_GETNUMSPUS_SYS,0,0);
+  lib_cpu_map[i] = mpctl(MPC_GETFIRSTSPU_SYS,0,0);
+  for (i = 1;((i < num) && (i < MAXCPUS)); i++) {
+    lib_cpu_map[i] = mpctl(MPC_GETNEXTSPU_SYS,lib_cpu_map[i-1],0);
+  }
+  /* from here, we set them all to -1 because if we launch more
+     loopers than actual CPUs, well, I'm not sure why :) */
+  for (; i < MAXCPUS; i++) {
+    lib_cpu_map[i] = -1;
+  }
+
+#else
+  /* we assume that there is indeed a contiguous mapping */
+  for (i = 0; i < MAXCPUS; i++) {
+    lib_cpu_map[i] = i;
+  }
+#endif
+}
+
 
 /****************************************************************/
 /*                                                              */
@@ -1068,6 +1104,16 @@
     lib_local_per_cpu_util[i] = 0.0;
   }
 
+  /* on those systems where we know that CPU numbers may not start at
+     zero and be contiguous, we provide a way to map from a
+     contiguous, starting from 0 CPU id space to the actual CPU ids.
+     at present this is only used for the netcpu_looper stuff because
+     we ass-u-me that someone setting processor affinity from the
+     netperf commandline will provide a "proper" CPU identifier. raj
+     2006-04-03 */
+
+  netlib_init_cpu_map();
+
   if (debug) {
     fprintf(where,
             "netlib_init: request_array at %p\n",
@@ -1749,19 +1795,32 @@
   2004/12/13 */
 
 void
-bind_to_specific_processor(int processor_affinity)
+bind_to_specific_processor(int processor_affinity, int use_cpu_map)
 {
 
+  int mapped_affinity;
+
+  /* this is in place because the netcpu_looper processor affinity
+     ass-u-me-s a contiguous CPU id space starting with 0. for the
+     regular netperf/netserver affinity, we ass-u-me the user has used
+     a suitable CPU id even when the space is not contiguous and
+     starting from zero */
+  if (use_cpu_map) {
+    mapped_affinity = lib_cpu_map[processor_affinity];
+  }
+  else {
+    mapped_affinity = processor_affinity;
+  }
+
 #ifdef HAVE_MPCTL
-#include <sys/mpctl.h>
   /* indeed, at some point it would be a good idea to check the return
      status and pass-along notification of error... raj 2004/12/13 */
-  mpctl(MPC_SETPROCESS_FORCE, processor_affinity, getpid());
+  mpctl(MPC_SETPROCESS_FORCE, mapped_affinity, getpid());
 #elif HAVE_PROCESSOR_BIND
 #include <sys/types.h>
 #include <sys/processor.h>
 #include <sys/procset.h>
-  processor_bind(P_PID,P_MYID,processor_affinity,NULL);
+  processor_bind(P_PID,P_MYID,mapped_affinity,NULL);
 #elif HAVE_BINDPROCESSOR
 #include <sys/processor.h>
   /* this is the call on AIX.  It takes a "what" of BINDPROCESS or
@@ -1772,7 +1831,7 @@
      would seem that the my_cpu() call returns the current CPU on
      which we are running rather than the CPU binding, so it's return
      value will not tell you if you are bound vs unbound. */
-  bindprocessor(BINDPROCESS,getpid(),(cpu_t)processor_affinity);
+  bindprocessor(BINDPROCESS,getpid(),(cpu_t)mapped_affinity);
 #elif HAVE_SCHED_SETAFFINITY
 #include <sched.h>
   /* gee, I wonder what we would do on a system with > 32 or 64
@@ -1780,7 +1839,7 @@
   unsigned long       this_mask;
   unsigned int        len = sizeof(this_mask);
 
-  this_mask = 1 << processor_affinity;
+  this_mask = 1 << mapped_affinity;
 
   if (sched_setaffinity(getpid(), len, &this_mask)) {
     if (debug) {
@@ -1798,7 +1857,7 @@
   /* really should be checking a return code one of these days. raj
      2005/08/31 */ 
 
-  bind_to_cpu_id(getpid(), processor_affinity,0);
+  bind_to_cpu_id(getpid(), mapped_affinity,0);
 
 #elif WIN32
 
@@ -1807,10 +1866,10 @@
     ULONG_PTR ProcessAffinityMask;
     ULONG_PTR SystemAffinityMask;
     
-    if ((processor_affinity < 0) || 
-	(processor_affinity > MAXIMUM_PROCESSORS)) {
+    if ((mapped_affinity < 0) || 
+	(mapped_affinity > MAXIMUM_PROCESSORS)) {
       fprintf(where,
-	      "Invalid processor_affinity specified: %d\n", processor_affinity);      fflush(where);
+	      "Invalid processor_affinity specified: %d\n", mapped_affinity);      fflush(where);
       return;
     }
     
@@ -1824,7 +1883,7 @@
 	exit(1);
       }
     
-    AffinityMask = (ULONG_PTR)1 << processor_affinity;
+    AffinityMask = (ULONG_PTR)1 << mapped_affinity;
     
     if (AffinityMask & ProcessAffinityMask) {
       if (!SetThreadAffinityMask( GetCurrentThread(), AffinityMask)) {
@@ -1833,7 +1892,7 @@
       }
     } else if (debug) {
       fprintf(where,
-	      "Processor affinity set to CPU# %d\n", processor_affinity);
+	      "Processor affinity set to CPU# %d\n", mapped_affinity);
       fflush(where);
     }
   }
@@ -2061,7 +2120,7 @@
   local_proc_affinity = netperf_request.content.dummy;
 
   if (local_proc_affinity != -1) {
-    bind_to_specific_processor(local_proc_affinity);
+    bind_to_specific_processor(local_proc_affinity,0);
   } 
 
 }

Modified: trunk/src/netlib.h
===================================================================
--- trunk/src/netlib.h	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/src/netlib.h	2006-04-04 01:09:54 UTC (rev 65)
@@ -477,7 +477,7 @@
 extern  double  calc_thruput_interval(double units_received,double elapsed);
 extern  float   calibrate_local_cpu(float local_cpu_rate);
 extern  float   calibrate_remote_cpu();
-extern  void    bind_to_specific_processor(int processor_affinity);
+extern  void    bind_to_specific_processor(int processor_affinity,int use_cpu_map);
 #ifndef WIN32
 
 /* WIN32 requires that at least one of the file sets to select be

Modified: trunk/src/netsh.c
===================================================================
--- trunk/src/netsh.c	2006-03-31 21:27:06 UTC (rev 64)
+++ trunk/src/netsh.c	2006-04-04 01:09:54 UTC (rev 65)
@@ -637,7 +637,7 @@
       break_args(optarg,arg1,arg2);
       if (arg1[0]) {
 	local_proc_affinity = convert(arg1);
-	bind_to_specific_processor(local_proc_affinity);
+	bind_to_specific_processor(local_proc_affinity,0);
       }
       if (arg2[0]) {
 	remote_proc_affinity = convert(arg2);



More information about the netperf-dev mailing list