/*----------------------------------------------------------------------------*/

/*
 * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
 * Author:  Don Heller, dheller@scl.ameslab.gov
 * Last revised:  19 January 2001
 */

/*----------------------------------------------------------------------------*/

#ifndef TESTNUMBER
#define TESTNUMBER 0
#endif
        // up to 29

/* 
 * select a test case, print anything with at least 1 event in every trial
 * usage:  rabbit8 [options] [trials]
 * see also rabbit8.sh
 */

/*----------------------------------------------------------------------------*/

#include <pmc_lib.h>
#include <stdlib.h>
#include <math.h>

/*----------------------------------------------------------------------------*/

pmc_data_t t0, t1, t2;
pmc_counter_t c;

int main(int argc, char * argv[])
{
  pmc_control_t Ctl = pmc_control_null;

  int e, i,j,k, out, trials = 1000;
  pmc_counter_t * a;
  pmc_data_t t3;

#if (19 <= TESTNUMBER) && (TESTNUMBER <= 20)
  int i19;
  double a19[10000], b19[10000];
#endif

  /* initialize internal data structures, read command-line arguments */

  Ctl.clean = 10;
  Ctl.stats = 1;	/* we need the min, mean and max */
  if (pmc_getargs(stderr, argv[0], &argc, &argv, &Ctl) == FALSE)
    { exit(RABBIT_FAILURE); }

  if (argc > 0) { trials = atoi(argv[0]); }

  printf("test number = %d, trials = %d\n", TESTNUMBER, trials);

  // pmc_counter_init(&c, &Ctl);
  c = Ctl.counters[0];

  if (pmc_open(0) == FALSE)		/* open /dev/pmc */
    { exit(RABBIT_FAILURE); }

  /* for (out = 0; out < Ctl.num_counters; out++) { ... } */

  for (i = 0, out = 0; i < Ctl.event_pairs; i++) {
    for (j = 0; j < Ctl.replication; j++, out++) {
      a = &Ctl.counters[out];
      pmc_select(a);
      pmc_read(&t0); pmc_read(&t1);
      for (k = 0; k < trials; k++) {
	pmc_read(&t0);
//...........................................................................
#if TESTNUMBER == 0
	  /* nothing */
#endif

#if TESTNUMBER == 1
	    __asm__ __volatile__ (
	        "xorl %%eax,%%eax\n\t"            /* eax = 0 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 2
	    __asm__ __volatile__ (
	        "movl $0,%%eax\n\t"               /* eax = 0 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 3
	    __asm__ __volatile__ (
	        "movl $1,%%eax\n\t"               /* eax = 1 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 4
	    __asm__ __volatile__ (
	        "movl $2,%%eax\n\t"               /* eax = 2 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 5
	    __asm__ __volatile__ (
	        "movl $3,%%eax\n\t"               /* eax = 3 */
	        "cpuid"                           /* registers affected */
	        :                                 /* output */
	        :                                 /* input */
	        : "eax", "ebx", "ecx", "edx"      /* clobbered */
	      );
#endif

#if TESTNUMBER == 6
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "edx"                    /* clobbered */
    );
#endif

#if TESTNUMBER == 7
  __asm__ __volatile__ (
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 8
  __asm__ __volatile__ (
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 9
  __asm__ __volatile__ (
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 10
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 11
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 12
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 13
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "movl $1,%%ecx\n\t"               /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 14
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "xorl %%ecx,%%ecx\n\t"            /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "incl %%ecx\n\t"                  /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 15
  __asm__ __volatile__ (
      "rdtsc\n\t"
      "movl %%eax,0(%%ebx)\n\t"
      "movl %%edx,4(%%ebx)\n\t"
      "movl $0,%%ecx\n\t"               /* ecx = 0 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,8(%%ebx)\n\t"
      "movl %%edx,12(%%ebx)\n\t"
      "incl %%ecx\n\t"                  /* ecx = 1 */
      "rdpmc\n\t"
#if defined(PMC_P6)
      "andl $255,%%edx\n\t"             /* clean the upper 24 bits */
#endif
      "movl %%eax,16(%%ebx)\n\t"
      "movl %%edx,20(%%ebx)"
      :                                 /* output */
      : "ebx" (&t2)                     /* input */
      : "eax", "ecx", "edx"             /* clobbered */
    );
#endif

#if TESTNUMBER == 16
	  pmc_read(&t1);
#endif

#if TESTNUMBER == 17
	  pmc_read(&t2);
#endif

#if TESTNUMBER == 18
	  pmc_read(&t3);
#endif

#if TESTNUMBER == 19
          for (i19 = 0; i19 < 10000; i19++) { a19[i19] = i19; }
#endif

#if TESTNUMBER == 20
          for (i19 = 0; i19 < 10000; i19++) { a19[i19] = i19; }
          for (i19 = 0; i19 < 10000; i19++) { b19[i19] = a19[i19] * 10.0; }
#endif

/* 20 groups of 5 */
#define INSTR(instr) \
  __asm__ __volatile__ ( \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" \
	instr "\n\t" instr "\n\t" instr "\n\t" instr "\n\t" instr \
	: : : "ecx");

#if TESTNUMBER == 21
  INSTR("addl $0,%%ecx")
#endif

#if TESTNUMBER == 22
  INSTR("addl $4,%%ecx")
#endif

#if TESTNUMBER == 23
  INSTR("movl $0,%%ecx")
#endif

#if TESTNUMBER == 24
  INSTR("movl $4,%%ecx")
#endif

#if TESTNUMBER == 25
  INSTR("incl %%ecx")
#endif

#if TESTNUMBER == 26
  INSTR("addl $1,%%ecx")
#endif

#if TESTNUMBER == 27
  INSTR("leal (%%ecx,%%ecx,8),%%ecx")
#endif

#if TESTNUMBER == 28
  INSTR("leal (%%ecx,%%ecx,8),%%ecx" "\n\t" "addl $1,%%ecx")
#endif

#if TESTNUMBER == 29
  { register int i; for (i = 0; i < 512*1024; i++) { } }
#endif

//...........................................................................
	pmc_read(&t1);
	pmc_accumulate(a,&t1,&t0);
	pmc_accumulate_clock(&c,&t1,NULL);
      }
    }
  }

  pmc_close();				/* close /dev/pmc */

  printf("number of trials = %10d                       "
         "min     mean   max  std.dev\n", trials);

  printf("%-49s %5d %8.2f %5d %8.2f\n",
    "cycles",
    (int)pmc_min_cycles(&c),
    pmc_mean_cycles(&c),
    (int)pmc_max_cycles(&c),
    sqrt(pmc_variance_cycles(&c))
    );

  for (i = 0, out = 0; i < Ctl.event_pairs; i++) {
    for (j = 0; j < Ctl.replication; j++, out++) {
      a = &Ctl.counters[out];
      for (k = 0; k < pmc_event_counters; k++) {
	e = Ctl.events[i][k];
	if (pmc_min_events(a,k) > 0)
	  {
	    printf("0x%02x %3d %-40s %5d %8.2f %5d %8.2f\n",
		e, e, pmc_event_name(e,k),
		(int)pmc_min_events(a,k),
		pmc_mean_events(a,k),
		(int)pmc_max_events(a,k),
		sqrt(pmc_variance_events(a,k))
		);
	  }
      }
    }
  }

  printf("\n");

  exit(RABBIT_SUCCESS);
}

/*----------------------------------------------------------------------------*/
