/*----------------------------------------------------------------------------*/

/*
 * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
 * Author:  Don Heller, dheller@scl.ameslab.gov
 * Last revised:  19 January 2001
 *
 * use with rabbit4.sh
 * some output from a Pentium II is given
 */

/*----------------------------------------------------------------------------*/

/* select a test case, print "interesting" events */

#ifndef TESTNUMBER
#define TESTNUMBER 0
#endif
	// up to 18

#define TRIALS 1000
#define CUTOFF 0.05

/*----------------------------------------------------------------------------*/

#include <pmc_lib.h>
#include <math.h>
#include <stdlib.h>

/*----------------------------------------------------------------------------*/

pmc_data_t t3;

int main(int argc, char * argv[])
{
  pmc_control_t Ctl = pmc_control_null;

  int i,j,k, out, trials = TRIALS;
  pmc_data_t t0, t1, t2;

  /* initialize internal data structures, read command-line arguments */

  Ctl.clean = 10;
  Ctl.stats = 1;
  for (k = 0; k < pmc_event_counters; k++)
    { Ctl.os[k] = 0; }

  if (pmc_getargs(stderr, argv[0], &argc, &argv, &Ctl) == FALSE)
    { exit(RABBIT_FAILURE); }

  if (argc > 0) { trials = atoi(argv[0]); }

  printf("test number = %d, trials = %d\n", TESTNUMBER, trials);

  if (pmc_open(0) == FALSE)		/* open /dev/pmc */
    { exit(RABBIT_FAILURE); }

  /* for (out = 0; out < Ctl.num_counters; out++) { ... } */

  for (i = 0, out = 0; i < Ctl.event_pairs; i++) {
    for (j = 0; j < Ctl.replication; j++, out++) {
      pmc_select(&Ctl.counters[out]);
      for (k = 0; k < trials; k++) {
	pmc_read(&t0);

#if TESTNUMBER == 0
	  /* nothing */

// test number 0
// Event                               Min      Mean      Max    Std.Dev.
// 0x43  67 data_mem_refs                1     1.000        7    0.048989
// 0x80 128 ifu_ifetch                   1     1.000       10    0.056862
// 0x79 121 cpu_clk_unhalted             1     1.014      407    2.344042
// 0xc2 194 uops_retired                 1     1.000        1    0.000000

//      pushl -1040(%ebp)	<<< overhead that is not measured
//      call pmc_read
//      pushl -1048(%ebp)	<<< overhead that cannot be removed
//      call pmc_read

#endif

#if TESTNUMBER == 1
	    __asm__ __volatile__ (
	        "xorl %%eax,%%eax\n\t"            /* eax = 0 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 2
	    __asm__ __volatile__ (
	        "movl $0,%%eax\n\t"               /* eax = 0 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 3
	    __asm__ __volatile__ (
	        "movl $1,%%eax\n\t"               /* eax = 1 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 4
	    __asm__ __volatile__ (
	        "movl $2,%%eax\n\t"               /* eax = 2 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 5
	    __asm__ __volatile__ (
	        "movl $3,%%eax\n\t"               /* eax = 3 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 6
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "edx"                    /* clobbered */
    );
#endif

#if TESTNUMBER == 7
  __asm__ __volatile__ (
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 8
  __asm__ __volatile__ (
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 9
  __asm__ __volatile__ (
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 10
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 11
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 12
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 13
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 14
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "incl %%ecx\n\t"                  /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 15
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "incl %%ecx\n\t"                  /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 16
	  pmc_read(&t1);
#endif

#if TESTNUMBER == 17
	  pmc_read(&t2);
#endif

#if TESTNUMBER == 18
	  pmc_read(&t3);
#endif

#if TESTNUMBER == 19
#endif

	pmc_read(&t1);
	pmc_accumulate(&Ctl.counters[out], &t1, &t0);
      }
    }
  }

  pmc_close();				/* close /dev/pmc */

  {
    int e;
    pmc_events_t Min, Max;
    double Mean, StdDev;

    printf("Event                                       Min      Mean      Max    Std.Dev.\n");
    for (i = 0, out = 0; i < Ctl.event_pairs; i++) {
      for (j = 0; j < Ctl.replication; j++, out++) {
	for (k = 0; k < pmc_event_counters; k++) {
	  Min  = pmc_min_events(&Ctl.counters[out],k);
	  Mean = pmc_mean_events(&Ctl.counters[out],k);
	  Max  = pmc_max_events(&Ctl.counters[out],k);
	  StdDev = sqrt(pmc_variance_events(&Ctl.counters[out],k));
	  if (Mean > CUTOFF) {
	    e = Ctl.events[i][k];
	    printf("0x%02x %3d %-30s %7lld %9.3f %8lld %11.6f\n",
		  e, e, pmc_event_name(e,k),
		  Min, Mean, Max, StdDev);
	  }
	}
      }
    }

#ifndef PMC_READ_KERNEL_MODE
    printf("Cycles                                      Min      Mean      Max    Std.Dev.\n");
    for (i = 0, out = 0; i < Ctl.event_pairs; i++) {
      for (j = 0; j < Ctl.replication; j++, out++) {
	  Min  = pmc_min_cycles(&Ctl.counters[out]);
	  Mean = pmc_mean_cycles(&Ctl.counters[out]);
	  Max  = pmc_max_cycles(&Ctl.counters[out]);
	  StdDev = sqrt(pmc_variance_cycles(&Ctl.counters[out]));
	  printf("cycles                                  %7lld %9.3f %8lld %11.6f\n",
		  Min, Mean, Max, StdDev);
      }
    }
#endif
  }

  exit(RABBIT_SUCCESS);
}

/*----------------------------------------------------------------------------*/
