/*----------------------------------------------------------------------------*/ /* * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux * Author: Don Heller, dheller@scl.ameslab.gov * Last revised: 19 January 2001 */ /*----------------------------------------------------------------------------*/ /* usage, Pentium Pro/II/III: * rabbit6.sh [trials] [cutoff] * rabbit6 -i scripts/in.perinstr0 0 [trials] [cutoff] * rabbit6 -i scripts/in.perinstr1 1 [trials] [cutoff] * * usage, Pentium: * rabbit6.5.sh [trials] [cutoff] * rabbit6 -i scripts/in.perinstr0.5 0 [trials] [cutoff] * * usage, Athlon: * (someone needs to write the input files and modify this code) */ /*----------------------------------------------------------------------------*/ /* measure a function, print "interesting" ratios of events */ void Test_info(int len); // general initialization void Test_init(int len); // before each test void Test(int len); // the test itself /* default values for command line */ #define TRIALS 3 #define CUTOFF 0.01 #ifdef DEBUG // look for problems with mis-aligned local double int fht_first = 0; char debug_fmt[] = "%4s &%-4s = 0x%08x\n"; #define ADDR(W,X) \ if ((int)(&X) & 7) { printf(debug_fmt, W, #X, (unsigned int)&X); } #endif #ifndef STATIC #define STATIC #endif /* this particular example is an FFT and its inverse * try it on various sizes */ #define TestLen (256*1024) /* power of 2 */ #define TestStart (16*1024) #define TestStop (16*1024) /* up to TestLen */ /*----------------------------------------------------------------------------*/ #include /* for sqrt() */ #include /* for atoi(), atof() */ #include /*----------------------------------------------------------------------------*/ int main(int argc, char * argv[]) { pmc_control_t Ctl = pmc_control_null; pmc_counter_t *a, c; pmc_data_t t0, t1; int i,j,k, m,n, len, out, e; /* command-line default values */ int flag = 0, trials = TRIALS; double cutoff = CUTOFF; /* initialize internal data structures, read command-line arguments */ if (pmc_getargs(stderr, argv[0], &argc, &argv, &Ctl) == FALSE) { exit(RABBIT_FAILURE); } if (argc > 0) { flag = atoi(argv[0]); } else { exit(RABBIT_FAILURE); } if (argc > 1) { trials = atoi(argv[1]); } if (argc > 2) { cutoff = atof(argv[2]); } if (pmc_open(0) == FALSE) /* open /dev/pmc */ { exit(RABBIT_FAILURE); } for (len = TestStart; len <= TestStop; len *= 2) { Test_info(len); // prepare for the test, once only pmc_counter_init(&c, &Ctl); /* for (out = 0; out < Ctl.num_counters; out++) { ... } */ for (i = 0, out = 0; i < Ctl.event_pairs; i++) { for (j = 0; j < Ctl.replication; j++, out++) { a = &Ctl.counters[out]; pmc_counter_reset(a); pmc_select(a); for (k = 0; k < trials; k++) { Test_init(len); // prepare for the test pmc_read(&t0); Test(len); // the test itself pmc_read(&t1); pmc_accumulate(a,&t1,&t0); // pmc_accumulate(&c,&t1,NULL); } pmc_accumulate_counter(&c,a); } } // print cycles, instruction count printf("min, mean, max, std. dev. over %lld trials\n", pmc_intervals(&c)); printf("cycles: "); printf(" %10lld", pmc_min_cycles(&c)); printf(" %10.0f", pmc_mean_cycles(&c)); printf(" %10lld", pmc_max_cycles(&c)); printf(" %10.0f", sqrt(pmc_variance_cycles(&c))); printf("\n"); e = 1 - flag; printf("instructions: "); printf(" %10lld", pmc_min_events(&c,e)); printf(" %10.0f", pmc_mean_events(&c,e)); printf(" %10lld", pmc_max_events(&c,e)); printf(" %10.0f", sqrt(pmc_variance_events(&c,e))); printf("\n"); printf("events per cycle\n"); printf(" min, mean, max, std. dev. over %d trials, cutoff = %f\n", trials, cutoff); for (i = 0, out = 0; i < Ctl.event_pairs; i++) { for (j = 0; j < Ctl.replication; j++, out++) { a = &Ctl.counters[out]; if (pmc_mean_events_per_cycle(a,flag) > cutoff) { e = Ctl.events[i][flag]; printf("0x%02x %-30.30s", e, pmc_event_name(e,flag)); printf(" %10.6f", pmc_min_events_per_cycle(a,flag)); printf(" %10.6f", pmc_mean_events_per_cycle(a,flag)); printf(" %10.6f", pmc_max_events_per_cycle(a,flag)); printf(" %10.6f", sqrt(pmc_variance_events_per_cycle(a,flag))); printf("\n"); } } } printf("events per instruction retired\n"); printf(" min, mean, max, std. dev. over %d trials, cutoff = %f\n", trials, cutoff); if (flag == 0) { m = 0; n = 1; } else { m = 1; n = 0; } for (i = 0, out = 0; i < Ctl.event_pairs; i++) { for (j = 0; j < Ctl.replication; j++, out++) { a = &Ctl.counters[out]; if (pmc_mean_ratio_events(a,m,n) > cutoff) { e = Ctl.events[i][m]; printf("0x%02x %-30.30s", e, pmc_event_name(e,m)); printf(" %10.6f", pmc_min_ratio_events(a,m,n)); printf(" %10.6f", pmc_mean_ratio_events(a,m,n)); printf(" %10.6f", pmc_max_ratio_events(a,m,n)); printf(" %10.6f", sqrt(pmc_variance_ratio_events(a,m,n))); printf("\n"); } } } } pmc_close(); /* close /dev/pmc */ exit(RABBIT_SUCCESS); } /*----------------------------------------------------------------------------*/ /* This example is modified from one of Al Aburto's benchmark programs. * From all the comments in the code, I hope it's legal to do this! */ double real[2*TestLen + 1]; double imag[2*TestLen + 1]; void fht(double * fz, int n); void ifft(int n, double * real, double * imag); void fft(int n, double * real, double * imag); // void realfft(int n, double * real); // void realifft(int n, double * real); void Test_info(int len) { printf("len = %d\n", len); #ifdef DEBUG printf("addr real = 0x%08x\n", (unsigned int)&real); // aligned properly? printf("addr imag = 0x%08x\n", (unsigned int)&imag); #endif } void Test_init(int len) { int i; for (i = 0; i < len; i++) { real[i] = (double) i; imag[i] = 0.0; } } void Test(int len) /* at most TestLen */ { fft(len, real, imag); ifft(len, real, imag); } /*----------------------------------------------------------------------------*/ /* ** FFT and FHT routines ** Copyright 1988, 1993; Ron Mayer ** ** fht(fz,n); ** Does a hartley transform of "n" points in the array "fz". ** fft(n,real,imag) ** Does a fourier transform of "n" points of the "real" and ** "imag" arrays. ** ifft(n,real,imag) ** Does an inverse fourier transform of "n" points of the "real" ** and "imag" arrays. ** realfft(n,real) ** Does a real-valued fourier transform of "n" points of the ** "real" and "imag" arrays. The real part of the transform ends ** up in the first half of the array and the imaginary part of the ** transform ends up in the second half of the array. ** realifft(n,real) ** The inverse of the realfft() routine above. ** ** ** NOTE: This routine uses at least 2 patented algorithms, and may be ** under the restrictions of a bunch of different organizations. ** Although I wrote it completely myself; it is kind of a derivative ** of a routine I once authored and released under the GPL, so it ** may fall under the free software foundation's restrictions; ** it was worked on as a Stanford Univ project, so they claim ** some rights to it; it was further optimized at work here, so ** I think this company claims parts of it. The patents are ** held by R. Bracewell (the FHT algorithm) and O. Buneman (the ** trig generator), both at Stanford Univ. ** If it were up to me, I'd say go do whatever you want with it; ** but it would be polite to give credit to the following people ** if you use this anywhere: ** Euler - probable inventor of the fourier transform. ** Gauss - probable inventor of the FFT. ** Hartley - probable inventor of the hartley transform. ** Buneman - for a really cool trig generator ** Mayer(me) - for authoring this particular version and ** including all the optimizations in one package. ** Thanks, ** Ron Mayer; mayer@acuson.com ** */ #define GOOD_TRIG // #include "trigtbl.h" /* start of trigtbl.h */ /* ** Please only distribute this with it's associated FHT routine. ** This algorithm is apparently patented(!) and the code copyrighted. ** See the comment with the fht routine for more info. ** -Thanks, ** Ron Mayer */ #ifdef GOOD_TRIG #else #define FAST_TRIG #endif #if defined(GOOD_TRIG) #define FHT_SWAP(a,b,t) {(t)=(a); (a)=(b); (b)=(t);} #define TRIG_VARS \ int t_lam = 0; #define TRIG_INIT(k,c,s) \ { \ int i; \ for (i = 2 ; i<=k ; i++) \ {coswrk[i] = costab[i]; sinwrk[i] = sintab[i];} \ t_lam = 0; \ c = 1; \ s = 0; \ } #define TRIG_NEXT(k,c,s) \ { \ int i,j; \ (t_lam)++; \ for (i = 0 ; !((1<1) \ { \ for (j = k-i+2 ; (1<>1; (!((k2^=k)&k)); k >>= 1); if (k1 > k2) { // double a; a = fz[k1]; fz[k1] = fz[k2]; fz[k2] = a; } } for (k = 0; (1<> 1; fi = fz; gi = fi + kx; fn = fz + n; do { // double g0,f0,f1,g1,f2,g2,f3,g3; f1 = fi[0 ] - fi[k1]; f0 = fi[0 ] + fi[k1]; f3 = fi[k2] - fi[k3]; f2 = fi[k2] + fi[k3]; fi[k2] = f0 - f2; fi[0 ] = f0 + f2; fi[k3] = f1 - f3; fi[k1] = f1 + f3; g1 = gi[0 ] - gi[k1]; g0 = gi[0 ] + gi[k1]; g3 = SQRT2 * gi[k3]; g2 = SQRT2 * gi[k2]; gi[k2] = g0 - g2; gi[0 ] = g0 + g2; gi[k3] = g1 - g3; gi[k1] = g1 + g3; gi += k4; fi += k4; } while (fi