#ifndef PMC_ASM_H
#define PMC_ASM_H

/*----------------------------------------------------------------------------*/

/*
 * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
 * Author:  Don Heller, dheller@scl.ameslab.gov
 * Last revised:  5 October 2001
 */

/*----------------------------------------------------------------------------*/

	/* assembler macros */

/*----------------------------------------------------------------------------*/

/* this file must be coordinated with pcm_arch.h
 *
 * note that none of these macros has a trailing ;
 *
 * The general structure of the GNU asm() is
 *   "code"
 *   : output registers (with destination)
 *   : input registers (with source)
 *   : other affected registers
 *
 * For more information see 'info gcc', especially the Extended ASM discussion.
 * The register "A" refers to registers "a" (low) and "d" (high) for 64-bit
 *   integers.
 */

/*----------------------------------------------------------------------------*/

/* generic instruction sequence */

/* buf must be pmc_uint64_t
 * N must be valid for the first instruction
 */

#define PMC_ASM(instructions,N,buf) \
  __asm__ __volatile__ ( instructions : "=A" (buf) : "c" (N) )

/*----------------------------------------------------------------------------*/

/* read a Model-Specific Register */

/* buf must be pmc_uint64_t
 * N must be a valid MSR number from pmc_arch.h
 * rdmsr is a privileged instruction that can only be used in kernel mode
 */

#define PMC_ASM_READ_MSR(N,buf) PMC_ASM("rdmsr",N,buf)

/*----------------------------------------------------------------------------*/

/* write a Model-Specific Register */

/* buf must be pmc_uint64_t
 * N must be a valid MSR number from pmc_arch.h
 * wrmsr is a privileged instruction that can only be used in kernel mode
 */

#define PMC_ASM_WRITE_MSR(N,buf) \
  __asm__ __volatile__ ( "wrmsr" : : "A" (buf), "c" (N) )

/*----------------------------------------------------------------------------*/

/* read the Time-Stamp Counter */

/* buf must be pmc_uint64_t
 * rdtsc has a permission bit that must be enabled
 */

#define PMC_ASM_READ_TSC(buf) \
  __asm__ __volatile__ ( "rdtsc" : "=A" (buf) )

/*----------------------------------------------------------------------------*/

/* read a Performance-Monitoring Counter */

/* buf must be pmc_uint64_t
 * N must be in the range [0,pmc_event_counters)
 * rdmsr is a privileged instruction that can only be used in kernel mode
 * rdpmc has a permission bit that must be enabled
 */

  /*
   * Pentium, Pentium/MMX:
   * The upper 24 bits of the Performance-Monitoring Counters are 0.
   *
   * Pentium Pro/II/III:
   * The upper 24 bits of the Time Stamp Counter are mapped to the upper 24
   * bits of the Performance-Monitoring Counters, so they must be cleared.
   *
   * Pentium 4:
   * unfinished
   *
   * Athlon:
   * The upper 16 bits of the Performance-Monitoring Counters are 0.
   */

#if defined(PMC_P6)
#define PMC_ASM_CLEAN   "\n\t" "andl $255,%%edx"
#else
#define PMC_ASM_CLEAN
#endif

#ifdef PMC_READ_KERNEL_MODE
#define PMC_ASM_READ_PMC(N,buf) PMC_ASM("rdmsr" PMC_ASM_CLEAN,PMC_ ## N,buf)
#else
#define PMC_ASM_READ_PMC(N,buf) PMC_ASM("rdpmc" PMC_ASM_CLEAN,N,buf)
#endif

/*----------------------------------------------------------------------------*/

/* read a Control Register */

/* buf must be pmc_uint32_t
 * N must be a valid control register number (0, 2, 3, 4)
 * this is a privileged instruction that can only be used in kernel mode
 */

#define PMC_ASM_READ_CR(N,buf) \
  __asm__ __volatile__ ( "movl %%cr" #N ",%0" : "=r" (buf) )

/*----------------------------------------------------------------------------*/

/* issue a serializing instruction */

/* cpuid is the only non-privileged serializing instruction that is practical
 * to use.  Its side effect is to flush the prefetch queue.
 */

#define PMC_ASM_SERIALIZE __asm__ __volatile__ ( \
  "xorl %%eax,%%eax\n\t" \
  "cpuid" \
  : : : "eax", "ebx", "ecx", "edx" )

/*----------------------------------------------------------------------------*/

/* read the cycle counter */

/* buf must be (pmc_data_t *) or (pmc_uint64_t *)
 */

#define PMC_ASM_READ_CLOCK(buf) __asm__ __volatile__ ( \
  "rdtsc\n\t" \
  "movl %%eax,0(%0)\n\t" \
  "movl %%edx,4(%0)" \
  : : "r" (buf) : "eax", "edx" )

/*----------------------------------------------------------------------------*/

/* read all the counters, cleaned */

/* buf must be (pmc_data_t *) or (pmc_uint64_t *)
 */

#if defined(PMC_P5)
#ifdef PMC_READ_KERNEL_MODE
  /* kernel mode, not safe to use rdpmc */

/* buf is (pmc_uint64 *) */
#define PMC_ASM_READ_ALL_ARRAY(buf) \
  PMC_ASM_READ_TSC((buf)[0]); \
  PMC_ASM_READ_PMC(0,(buf)[1]); \
  PMC_ASM_READ_PMC(1,(buf)[2])

/* buf is (pmc_data_t *) */
#define PMC_ASM_READ_ALL_DATA(buf) \
  PMC_ASM_READ_TSC((buf)->cycle); \
  PMC_ASM_READ_PMC(0,(buf)->event[0]); \
  PMC_ASM_READ_PMC(1,(buf)->event[1])

#else
  /* not kernel mode, safe to use rdpmc */

#define PMC_ASM_READ_ALL_ARRAY(buf) __asm__ __volatile__ ( \
  "rdtsc\n\t" \
  "movl %%eax,0(%0)\n\t" \
  "movl %%edx,4(%0)\n\t" \
  "xorl %%ecx,%%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,8(%0)\n\t" \
  "movl %%edx,12(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,16(%0)\n\t" \
  "movl %%edx,20(%0)" \
  : : "r" (buf) : "eax", "ecx", "edx" )

#define PMC_ASM_READ_ALL_DATA(buf) PMC_ASM_READ_ALL_ARRAY(buf)

#endif	/* PMC_READ_KERNEL_MODE */
#endif	/* PMC_P5 */

#if defined(PMC_P6)
#define PMC_ASM_READ_ALL_ARRAY(buf) __asm__ __volatile__ ( \
  "rdtsc\n\t" \
  "movl %%eax,0(%0)\n\t" \
  "movl %%edx,4(%0)\n\t" \
  "xorl %%ecx,%%ecx\n\t" \
  "rdpmc\n\t" \
  "andl $255,%%edx\n\t" \
  "movl %%eax,8(%0)\n\t" \
  "movl %%edx,12(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "andl $255,%%edx\n\t" \
  "movl %%eax,16(%0)\n\t" \
  "movl %%edx,20(%0)" \
  : : "r" (buf) : "eax", "ecx", "edx" )

#define PMC_ASM_READ_ALL_DATA(buf) PMC_ASM_READ_ALL_ARRAY(buf)
#endif	/* PMC_P6 */

#if defined(PMC_P15)
/* begin unfinished */
#define PMC_ASM_READ_ALL_ARRAY(buf) __asm__ __volatile__ ( \
  "rdtsc\n\t" \
  "movl %%eax,0(%0)\n\t" \
  "movl %%edx,4(%0)\n\t" \
  "xorl %%ecx,%%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,8(%0)\n\t" \
  "movl %%edx,12(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,16(%0)\n\t" \
  "movl %%edx,20(%0)" \
  : : "r" (buf) : "eax", "ecx", "edx" )

#define PMC_ASM_READ_ALL_DATA(buf) PMC_ASM_READ_ALL_ARRAY(buf)
/* end unfinished */
#endif	/* PMC_P15 */

#if defined(PMC_K7)
#define PMC_ASM_READ_ALL_ARRAY(buf) __asm__ __volatile__ ( \
  "rdtsc\n\t" \
  "movl %%eax,0(%0)\n\t" \
  "movl %%edx,4(%0)\n\t" \
  "xorl %%ecx,%%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,8(%0)\n\t" \
  "movl %%edx,12(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,16(%0)\n\t" \
  "movl %%edx,20(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,24(%0)\n\t" \
  "movl %%edx,28(%0)\n\t" \
  "incl %%ecx\n\t" \
  "rdpmc\n\t" \
  "movl %%eax,32(%0)\n\t" \
  "movl %%edx,36(%0)" \
  : : "r" (buf) : "eax", "ecx", "edx" )

#define PMC_ASM_READ_ALL_DATA(buf) PMC_ASM_READ_ALL_ARRAY(buf)
#endif	/* PMC_K7 */

/*----------------------------------------------------------------------------*/

#endif	/* PMC_ASM_H */
