Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
This example introduces
   pmc_counter_t       -- cycle and event accumulators, user interface
   stats               -- level of statistical detail
   pmc_intervals_t
   pmc_intervals()
   pmc_min_cycles()              -- also max, mean, variance
   pmc_min_events()
   pmc_nonzero_intervals()
   pmc_min_events_per_cycle()
   pmc_ratio_intervals()         -- event i / event j
   pmc_min_ratio_events()

Previous example       -- pmc_counter_t, intro
Download this example
Next example           -- data acquisition
Return to Main Menu


Compile with gcc -o menu7 -O `pmc_options` menu7.c -lm -lpmc Try these examples (Pentium Pro/II/III): menu7 --e 192,194 -d menu7 --e 192,194 menu7 --e 192,194 -Stats 0 menu7 --e 192,194 -Stats 1 menu7 --e 192,194 -Stats 2
#include <pmc_lib.h> /* for sqrt() */ #include <math.h> int main(int argc, char * argv[]) { pmc_control_t Ctl = pmc_control_null; pmc_data_t t0, t1; pmc_counter_t tot; pmc_intervals_t n, m, m01, m10; pmc_cycles_t elapsed; int i, j, k; if (pmc_getargs(stderr, argv[0], &argc, &argv, &Ctl) == FALSE) { exit(1); } pmc_counter_init(&tot, &Ctl); /* initialize the accumulator */ if (pmc_open(0) == FALSE) /* open /dev/pmc */ { exit(1); } pmc_select(&tot); for (k = 0; k < 10; k++) { pmc_read(&t0); /* read the counters */ i = 0, j = 10000; while (i < j) { i++; j--; } pmc_read(&t1); /* read the counters */ elapsed = pmc_accumulate(&tot, &t1, &t0); } pmc_close(); /* close /dev/pmc */ printf("tot.pmc_sum_cycles() = %16lld\n", pmc_sum_cycles(&tot)); printf("tot.pmc_sum_events(0) = %16lld\n", pmc_sum_events(&tot,0)); printf("tot.pmc_sum_events(1) = %16lld\n", pmc_sum_events(&tot,1)); printf("\n"); n = pmc_intervals(&tot); m = pmc_nonzero_intervals(&tot); printf("cycles and events (%d total, %d nonzero)\n", (int) n, (int) m); printf(" min %16lld %16lld %16lld\n", pmc_min_cycles(&tot), pmc_min_events(&tot,0), pmc_min_events(&tot,1)); printf(" max %16lld %16lld %16lld\n", pmc_max_cycles(&tot), pmc_max_events(&tot,0), pmc_max_events(&tot,1)); printf(" mean %16.6f %16.6f %16.6f\n", pmc_mean_cycles(&tot), pmc_mean_events(&tot,0), pmc_mean_events(&tot,1)); printf(" variance %16.6f %16.6f %16.6f\n", pmc_variance_cycles(&tot), pmc_variance_events(&tot,0), pmc_variance_events(&tot,1)); printf(" std.dev. %16.6f %16.6f %16.6f\n", sqrt(pmc_variance_cycles(&tot)), sqrt(pmc_variance_events(&tot,0)), sqrt(pmc_variance_events(&tot,1))); printf("\n"); printf("events per cycle\n"); printf(" min %16.6f %16.6f\n", pmc_min_events_per_cycle(&tot,0), pmc_min_events_per_cycle(&tot,1)); printf(" max %16.6f %16.6f\n", pmc_max_events_per_cycle(&tot,0), pmc_max_events_per_cycle(&tot,1)); printf(" mean %16.6f %16.6f\n", pmc_mean_events_per_cycle(&tot,0), pmc_mean_events_per_cycle(&tot,1)); printf(" variance %16.6f %16.6f\n", pmc_variance_events_per_cycle(&tot,0), pmc_variance_events_per_cycle(&tot,1)); printf(" std.dev. %16.6f %16.6f\n", sqrt(pmc_variance_events_per_cycle(&tot,0)), sqrt(pmc_variance_events_per_cycle(&tot,1))); printf("\n"); m01 = pmc_ratio_intervals(&tot,0,1); m10 = pmc_ratio_intervals(&tot,1,0); printf("event ratios, 0:1, 1:0 (%d, %d intervals)\n", (int) m01, (int) m10); printf(" min %16.6f %16.6f\n", pmc_min_ratio_events(&tot,0,1), pmc_min_ratio_events(&tot,1,0)); printf(" max %16.6f %16.6f\n", pmc_max_ratio_events(&tot,0,1), pmc_max_ratio_events(&tot,1,0)); printf(" mean %16.6f %16.6f\n", pmc_mean_ratio_events(&tot,0,1), pmc_mean_ratio_events(&tot,1,0)); printf(" variance %16.6f %16.6f\n", pmc_variance_ratio_events(&tot,0,1), pmc_variance_ratio_events(&tot,1,0)); printf(" std.dev. %16.6f %16.6f\n", sqrt(pmc_variance_ratio_events(&tot,0,1)), sqrt(pmc_variance_ratio_events(&tot,1,0))); exit(0); }
Synopsis typedef unsigned long long int pmc_intervals_t; typedef struct { /* event codes */ pmc_selector_t selector; /* housekeeping */ int clean; /* clean == 0,+ve -- remove pmc_read() overhead? */ int stats; /* stats == 0,1,2 -- level of statistical detail */ /* pmc_read() overhead estimate (clean > 0) */ pmc_cycles_t overhead_cycles; pmc_events_t overhead_events[pmc_event_counters]; /* summation over all time intervals (stats == 0,1,2) */ pmc_cycles_t sum_cycles; pmc_events_t sum_events[pmc_event_counters]; /* from here, used with statistics calculations (stats == 1,2) */ /* number of measured intervals */ pmc_intervals_t intervals; /* minimum */ pmc_cycles_t min_cycles; pmc_events_t min_events[pmc_event_counters]; /* maximum */ pmc_cycles_t max_cycles; pmc_events_t max_events[pmc_event_counters]; /* mean */ double mean_cycles; double mean_events[pmc_event_counters]; /* sum of squares of deviations from the mean, used to compute variance */ double ssq_cycles; double ssq_events[pmc_event_counters]; /* measured nonzero intervals, for rates */ pmc_intervals_t nonzero_intervals; double sum_events_per_cycle[pmc_event_counters]; double min_events_per_cycle[pmc_event_counters]; double max_events_per_cycle[pmc_event_counters]; double mean_events_per_cycle[pmc_event_counters]; double ssq_events_per_cycle[pmc_event_counters]; /* from here, used with ratio calculations (stats == 2) */ /* ratio, event i / event j */ pmc_intervals_t ratio_intervals[pmc_event_counters][pmc_event_counters]; double sum_ratio[pmc_event_counters][pmc_event_counters]; double min_ratio[pmc_event_counters][pmc_event_counters]; double max_ratio[pmc_event_counters][pmc_event_counters]; double mean_ratio[pmc_event_counters][pmc_event_counters]; double ssq_ratio[pmc_event_counters][pmc_event_counters]; } pmc_counter_t; double pmc_seconds(const pmc_cycles_t c); /* summation over all intervals (stats == 0,1,2) */ pmc_cycles_t pmc_sum_cycles(const pmc_counter_t * const a); pmc_events_t pmc_sum_events(const pmc_counter_t * const a, const int i); /* from here, used with statistics calculations (stats == 1,2) */ pmc_intervals_t pmc_intervals(const pmc_counter_t * const a); pmc_intervals_t pmc_nonzero_intervals(const pmc_counter_t * const a); pmc_cycles_t pmc_min_cycles(const pmc_counter_t * const a); pmc_events_t pmc_min_events(const pmc_counter_t * const a, const int i); pmc_cycles_t pmc_max_cycles(const pmc_counter_t * const a); pmc_events_t pmc_max_events(const pmc_counter_t * const a, const int i); double pmc_mean_cycles(const pmc_counter_t * const a); double pmc_mean_events(const pmc_counter_t * const a, const int i); double pmc_variance_cycles(const pmc_counter_t * const a); double pmc_variance_events(const pmc_counter_t * const a, const int i); double pmc_min_events_per_cycle(const pmc_counter_t * const a, const int i); double pmc_max_events_per_cycle(const pmc_counter_t * const a, const int i); double pmc_mean_events_per_cycle(const pmc_counter_t * const a, const int i); double pmc_variance_events_per_cycle(const pmc_counter_t * const a, const int i); /* from here, used with ratio calculations (stats == 2) */ pmc_intervals_t pmc_ratio_intervals(const pmc_counter_t * const a, const int i, const int j); double pmc_min_ratio_events(const pmc_counter_t * const a, const int i, const int j); double pmc_max_ratio_events(const pmc_counter_t * const a, const int i, const int j); double pmc_mean_ratio_events(const pmc_counter_t * const a, const int i, const int j); double pmc_variance_ratio_events(const pmc_counter_t * const a, const int i, const int j);
Notes This example further introduces pmc_select(), pmc_read() and pmc_accumulate(), emphasizing the statistical information gathered by pmc_accumulate() under control of the pmc_counter_t stats component. The use of tot in the example can be summarized more simply as tot = 0 pmc_counter_init(&tot, &Ctl); choose the events pmc_counter_init(&tot, &Ctl); tell the processor pmc_select(&tot); begin pmc_read(&t0); end pmc_read(&t1); tot += (t1 -= t0) elapsed = pmc_accumulate(&tot, &t1, &t0); The elapsed time is since pmc_open() or the last pmc_start(). t1 now has the results from the measured time interval, accessible as pmc_cycle(&t1) or pmc_event(&t1,i). tot retains information about all the measured time intervals, accessible as stats = 0, 1, 2 pmc_sum_cycles() pmc_sum_events() These are accumulated for every time interval. stats = 1, 2 pmc_intervals() pmc_min_cycles() pmc_min_events() pmc_max_cycles() pmc_max_events() pmc_mean_cycles() pmc_mean_events() pmc_variance_cycles() pmc_variance_events() These are accumulated for every time interval. pmc_nonzero_intervals() pmc_min_events_per_cycle() pmc_max_events_per_cycle() pmc_mean_events_per_cycle() pmc_variance_events_per_cycle() These are accumulated only if the number of cycles in the time interval is nonzero. stats = 2 pmc_ratio_intervals() [event i / event j] pmc_min_ratio_events() pmc_max_ratio_events() pmc_mean_ratio_events() pmc_variance_ratio_events() These are accumulated only if event j occurs in the time interval. The stats component of Ctl is set by the command-line option -Stats n. The stats component of tot is copied from Ctl.stats by pmc_counter_init(), but it can also be assigned directly. Changing stats once the measurements have started would not be a good idea. If nothing has been accumulated, the access functions will return 0; this could either mean nothing has happened, or the appropriate level of record-keeping was not activated. The mean and variance are defined as data values x(i), i = 1:N mean = sum of x(i) / N variance = sum of (x(i) - mean)**2 / (N-1), standard deviation = sqrt(variance) but they are computed using a numerically stable algorithm that updates the mean and variance as each new data value is obtained. This limits the amount of storage required, but prevents the calculation of more extensive statistics.
Further Notes In future implementations, we could extend the stats option to include 1. ignore the first measurement, as this tends to be dominated by startup costs (see the -trim option); 2. throw away outliers, which may be due to competition from the OS or other processes, or due to rare events in a ratio calculation. However, it would be better to capture the startup information in a separate pmc_counter_t, or to do some binning operations. As a reminder, we attach our measurements to the processor, not to the process, so it is possible that the OS and other processes contribute to the measured data taken by the current process, as the owner of /dev/pmc. Other implementations based on kernel modifications to the process state may not be so affected, but then the kernel or a loadable module would be required to maintain the performance statistics.
Forward References pmc_select(), pmc_read(), pmc_accumulate() pmc_print_results()

Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
Author: Don Heller, dheller@scl.ameslab.gov
Last revised: 2 August 2000