#ifndef PMC_LIB_H
#define PMC_LIB_H

/*----------------------------------------------------------------------------*/

/*
 * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
 * Author:  Don Heller, dheller@scl.ameslab.gov
 * Last revised:  5 October 2001
 *
 * This software is derived from mperfmon (pperf) and its associated library,
 * by M. Patrick Goda and Michael S. Warren, Los Alamos National Laboratory.
 * Their README file, including a disclaimer, and the Ames Laboratory codicil,
 * are considered part of the software distribution.
 */

/*----------------------------------------------------------------------------*/

	/* public interface to the PMC library */

/*----------------------------------------------------------------------------*/

/*
 * See pmc_options.h for a description of the compile-time options.
 *
 * Overview of the PMC library
 *
 *   number of event counters
 *	pmc_event_counters	[a #define'd constant]
 *
 *   hardware cycle and event counters, system interface
 *	pmc_uint32_t		[a long register]
 *	pmc_uint64_t		[a long long register]
 *	pmc_selector_t		[control information]
 *
 *   hardware cycle and event counters, user interface
 *	pmc_cycle_t		[an unsigned integer type]
 *	pmc_event_t
 *	pmc_second()		[convert pmc_cycle_t]
 *	effective size of counters, bits
 *	   pmc_cycle_bits
 *	   pmc_event_bits
 *	pmc_data_t		[a consistent set of counter values]
 *	   pmc_read()		[mark a moment in time]
 *	   pmc_cycle()		[extract one component]
 *	   pmc_event()
 *	   pmc_read_clock()	[pmc_read() without the event counters]
 *	pmc_event_valid()	[self-explanatory]
 *	pmc_event_name()	[character string]
 *	pmc_event_description()	[character string]
 *
 *   cycle and event accumulators
 *	pmc_intervals_t		[an unsigned integer type]
 *	pmc_cycles_t
 *	pmc_events_t
 *	pmc_seconds()		[convert pmc_cycles_t]
 *	effective size of accumulators, bits
 *	   pmc_cycles_bits
 *	   pmc_events_bits
 *	pmc_counter_t		[selector and accumulators]
 *	   pmc_counters_t	[bundle of counters, integer]
 *	   pmc_stats_t		[bundle of counters, double]
 *	   pmc_rates_t		[bundle of event counter per-cycle rates]
 *	   pmc_counter_init()	[compute the selector]
 *	   pmc_counter_reset()	[set the count to 0]
 *	   pmc_counter_print()	[print the accumulated intervals]
 *	   pmc_select()		[give control information to the hardware]
 *				[affected by the -Clean option]
 *	   pmc_accumulate()	[compute an interval between two moments]
 *				[affected by the -Stats option]
 *	   pmc_select_clock()	[for use with pmc_read_clock()]
 *	   pmc_accumulate_clock()
 *	   pmc_accumulate_counter()	[merge data from two counters]
 *	access functions
 *	   all intervals		[maintained always]
 *	     pmc_intervals()		[number of accumulated intervals]
 *	     pmc_nonzero_intervals()	[where the cycle count was nonzero]
 *	     pmc_ratio_intervals()	[where the second event count was nonzero]
 *	     pmc_sum_cycles()
 *	     pmc_sum_events()
 *
 *	   all intervals, occurences	[maintained if -Stats [1|2]]
 *	     pmc_min_cycles()
 *	     pmc_min_events()
 *	     pmc_max_cycles()
 *	     pmc_max_events()
 *	     pmc_mean_cycles()
 *	     pmc_mean_events()
 *	     pmc_variance_cycles()
 *	     pmc_variance_events()
 *
 *	   all nonzero intervals, rates	[maintained if -Stats [1|2]]
 *	     pmc_min_events_per_cycle()
 *	     pmc_max_events_per_cycle()
 *	     pmc_mean_events_per_cycle()
 *	     pmc_variance_events_per_cycle()
 *
 *	   all valid ratios		[maintained if -Stats 2]
 *	     pmc_min_ratio_events()
 *	     pmc_max_ratio_events()
 *	     pmc_mean_ratio_events()
 *	     pmc_variance_ratio_events()
 *
 *   define the experiment
 *	pmc_event_set_t		[events to be monitored concurrently]
 *	pmc_event_label_t	[textual description of the event set]
 *	pmc_control_t		[set of accumulators, etc.]
 *	pmc_control_null	[initializer]
 *	pmc_control_print()	[print a pmc_control_t]
 *	pmc_getargs()		[read command line]
 *	pmc_start()
 *	pmc_reset()
 *	pmc_print_results()
 *
 *   acquire and release exclusive access to the hardware counters
 *	/dev/pmc
 *	/proc/pmc
 *	pmc_open()
 *	pmc_close()
 *
 *   miscellany
 *	pmc_seconds_per_cycle
 *	alignment of data structures
 *
 *   manipulate the hardware
 *	pmc_configure()
 *
 *   sample a child process
 *	pmc_run_command()
 *	rabbit
 *
 *   sample yourself
 *	pmc_begin_sampling()
 *	pmc_end_sampling()
 */

/*----------------------------------------------------------------------------*/

/*
 * This library uses features that may be unique to GNU CC, in particular
 *	#error
 *	__asm__
 *	__volatile__
 *	__inline__
 *	__attribute__
 *	__alignof__
 * Also, the long long int type is not in ANSI Standard C, but is commonly
 * accepted.
 */

/*----------------------------------------------------------------------------*/

#include <stdio.h>

/*----------------------------------------------------------------------------*/

#include <pmc_events.h>
#include <pmc_asm.h>

/*----------------------------------------------------------------------------*/

/* immutable constants */

#ifndef TRUE
#define TRUE 1
#endif

#ifndef FALSE
#define FALSE 0
#endif

/*----------------------------------------------------------------------------*/

/* mutable constants, derived from the processor */

/* cache line size, bytes
 *   used only for alignment requests
 *
 * compare to the Linux kernel's definition of L1_CACHE_BYTES
 *   derived from the processor type given at kernel configuration
 *   2.0: not defined
 *   2.2: in /usr/include/asm/cache.h
 *   2.4: in /usr/include/asm/cache.h
 *
 * compare to the Linux kernel's definition of SMP_CACHE_BYTES
 *   used in kernel/sched.c and elsewhere for data structure padding
 *   2.0: not defined
 *   2.2: in /usr/include/asm/cache.h
 *   2.4: in /usr/include/linux/cache.h
 *
 * compare to the Linux kernel's definition of CACHE_LINE_SIZE
 *   used in various places but not in an include file
 */

#ifndef PMC_CACHE_LINE
#if defined(PMC_P5) || defined(PMC_P6)
#define PMC_CACHE_LINE 32
#endif
#if defined(PMC_P15) || defined(PMC_K7)
#define PMC_CACHE_LINE 64
#endif
#endif	/* PMC_CACHE_LINE */

/*----------------------------------------------------------------------------*/

/* mutable constants, derived from the operating system */

/* maximum sample rate for pmc_run_command() and pmc_begin_sampling()
 * based on the Linux scheduler and interval timer behavior
 *
 * A Linux jiffy = 1/100 sec., the shortest process scheduling time period.
 */

#ifndef PMC_MAX_RATE
#define PMC_MAX_RATE 100
#endif

/*----------------------------------------------------------------------------*/

/* mutable constants
 *   easily changed, but everything should be recompiled
 */

/* processor MHz rating */

#ifndef PMC_MHZ
#define PMC_MHZ 800
#endif

/* system bus MHz rating */

#ifndef PMC_BUS_MHZ
#define PMC_BUS_MHZ 133
#endif

/* cycle counter, picoseconds per cycle */

#ifndef PMC_PICOSEC
#define PMC_PICOSEC (1000000 / PMC_MHZ)
#endif

/* string length in the pmc_control_t struct */

#ifndef PMC_STRING
#define PMC_STRING 256
#endif

/*----------------------------------------------------------------------------*/

/* Use the inline version of pmc_read()?  [pmc_read_clock() is always inline] */

#if defined(PMC_READ_FUNCTION) \
 || defined(PMC_READ_KERNEL_MODE) \
 || defined(PMC_LIST) \
 || defined(PMC_VERBOSE)
#undef PMC_READ_INLINE
#undef PMC_READ_FUNCTION
#define PMC_READ_FUNCTION
#endif

#if defined(PMC_READ_KERNEL_MODE)
#undef PMC_READ_FUNCTION
#endif

/*----------------------------------------------------------------------------*/

/* exit codes for rabbit
 *
 *    0 no error detected, no program attempted
 *    1 error detected, no program attempted
 *    2 no error detected, but program ran with abnormal exit
 *  126 error detected, could not fork
 *  126 error detected, child process did not run
 *  127 error detected, child process not found
 *  otherwise, exit code from program executed
 *
 * These are intended to be consistent with the GNU time program.
 */

#ifndef RABBIT_SUCCESS
#define RABBIT_SUCCESS		0
#define RABBIT_FAILURE		1
#define RABBIT_NO_STATUS	2
#define RABBIT_DID_NOT_FORK	126
#define RABBIT_DID_NOT_RUN	126
#define RABBIT_DID_NOT_FIND	127
#endif	/* RABBIT_SUCCESS */

/*----------------------------------------------------------------------------*/

/* alignment of data structures to requested boundary
 *
 * Note that this is only a request; it is not guaranteed for local variables
 * on an x86 processor.  Locals are allocated on the runtime stack, and while
 * the compiler can adjust offsets to meet alignment requests, the stack top
 * itself need only be a multiple of 4 (on an x86 processor with GCC).  Global
 * variables and static local variables are indeed aligned as requested.  If
 * the alignment request is made for an array, whose element size is not a
 * multiple of the alignment size, the second element will probably not be
 * aligned properly.
 *
 * compare to the Linux kernel's definition of __cacheline_aligned
 *   2.0: not defined
 *   2.2: in /usr/include/asm/init.h
 *   2.4: in /usr/include/linux/cache.h
 *
 * compare to the Linux kernel's definition of L1_CACHE_ALIGN
 *   2.0: not defined
 *   2.2: in /usr/include/asm/cache.h
 *   2.4: in /usr/include/linux/cache.h
 */

#ifdef PMC_UNALIGNED
#define PMC_ALIGN(x)
#else
#define PMC_ALIGN(x) __attribute__ (( aligned(x) ))
#endif	/* PMC_UNALIGNED */

/*----------------------------------------------------------------------------*/

/* pmc_uint32_t, pmc_uint64_t
 *
 * hardware cycle and event counters, system interface
 */

/* 32-bit register */

typedef unsigned long int
  pmc_uint32_t
  PMC_ALIGN(sizeof(unsigned long int));

/* 64-bit register */

typedef union
  {
      signed long long int whole;
    unsigned long long int bits64;
    struct
      {
	pmc_uint32_t low;
	pmc_uint32_t high;
      }
      bits32;
  }
  pmc_uint64_t
  PMC_ALIGN(sizeof(unsigned long long int));

/*----------------------------------------------------------------------------*/

/*
 * Raw data from the hardware counters, unsigned integer types
 *   pmc_cycle_t
 *   pmc_event_t
 *
 * Size of the counters, bits		Intel	AMD
 *   pmc_cycle_bits			  64	64
 *   pmc_event_bits			  40	48
 *
 * Number of event counters		Intel	AMD
 *   pmc_event_counters			   2	 4
 *
 * Struct holding a consistent set of counter values
 *   pmc_data_t
 *
 * Acquisition
 *   void pmc_read(pmc_data_t *);
 *   void pmc_read_clock(pmc_data_t *);
 *
 * Extraction and conversion
 *   pmc_cycle_t pmc_cycle(const pmc_data_t * const);
 *   pmc_event_t pmc_event(const pmc_data_t * const, const int);
 *   double pmc_second(const pmc_cycle_t);
 *
 * Naming convention:
 *   These values represent a moment in time, so cycle and event are singular.
 */

/*----------------------------------------------------------------------------*/

/* pmc_selector_t
 *
 * event counters and control registers
 */

/* these are #define'd in pmc_arch.h
 *   pmc_event_counters
 *   pmc_cycle_bits
 *   pmc_event_bits
 */

/* control information, system interface */

typedef struct { pmc_uint32_t c[pmc_event_counters]; } pmc_selector_t;

	/* this is a struct so it can used with = and as a return value */

/*----------------------------------------------------------------------------*/

/* pmc_cycle_t, pmc_event_t
 *
 * hardware cycle and event counters, user interface
 */

typedef unsigned long long int pmc_cycle_t;
typedef unsigned long long int pmc_event_t;

/*----------------------------------------------------------------------------*/

/* pmc_data_t
 * 
 * raw data collected by pmc_read() and pmc_read_clock()
 */

#if defined(PMC_P5) || defined(PMC_P6)
  /* 32-byte cache line, 1 cycle + 2 event counters */
#define PMC_FILLER 1
#endif

#if defined(PMC_P15)
/* begin unfinished */
  /* 64-byte cache line, 1 cycle + 2 event counters */
#define PMC_FILLER 5
/* end unfinished */
#endif

#if defined(PMC_K7)
  /* 64-byte cache line, 1 cycle + 4 event counters */
#define PMC_FILLER 3
#endif

typedef struct
  {
    pmc_uint64_t cycle;
    pmc_uint64_t event[pmc_event_counters];
    pmc_uint64_t filler[PMC_FILLER];
  }
  pmc_data_t
  PMC_ALIGN(PMC_CACHE_LINE);

  /*
   * pmc_read() and pmc_read_clock() may be written in assembler,
   *   so don't rearrange the fields.
   *
   * Do not use the filler array for any purpose - it could go away in the
   *   future.
   *
   * The filler brings the struct up to a full 32- or 64-byte cache line.
   *   One alternative is to use a union type instead of the filler array,
   *   but that leads to a lot of ugly code that would be hard to change
   *   later if we decide the filler is not necessary.
   */

#undef PMC_FILLER

/*----------------------------------------------------------------------------*/

/*
 * Accumulated data from the counters, unsigned integer types
 *   pmc_intervals_t
 *   pmc_cycles_t
 *   pmc_events_t
 *
 * Size of the accumulators, bits	Intel	AMD
 *   pmc_cycles_bits			  64	64
 *   pmc_events_bits			  64	64
 *
 * Type conversions without error, by widening the integer:
 *   pmc_cycle_t to pmc_cycles_t
 *   pmc_event_t to pmc_events_t
 *
 * Event selection
 *   pmc_selector_t
 *
 * Struct holding all the accumulators, plus selector information
 *   pmc_counter_t
 *
 * Extraction
 *   pmc_intervals_t pmc_intervals(const pmc_counter_t * const);
 *   pmc_intervals_t pmc_nonzero_intervals(const pmc_counter_t * const);
 *
 *   pmc_cycles_t pmc_sum_cycles(const pmc_counter_t * const);
 *   pmc_events_t pmc_sum_events(const pmc_counter_t * const, const int);
 *
 *   pmc_cycles_t pmc_min_cycles(const pmc_counter_t * const);
 *   pmc_events_t pmc_min_events(const pmc_counter_t * const, const int);
 *
 *   pmc_cycles_t pmc_max_cycles(const pmc_counter_t * const);
 *   pmc_events_t pmc_max_events(const pmc_counter_t * const, const int);
 *
 *   double pmc_mean_cycles(const pmc_counter_t * const);
 *   double pmc_mean_events(const pmc_counter_t * const, const int);
 *
 *   double pmc_variance_cycles(const pmc_counter_t * const);
 *   double pmc_variance_events(const pmc_counter_t * const, const int);
 *
 *   double pmc_min_events_per_cycle(const pmc_counter_t * const, const int);
 *   double pmc_max_events_per_cycle(const pmc_counter_t * const, const int);
 *   double pmc_mean_events_per_cycle(const pmc_counter_t * const, const int);
 *   double pmc_variance_events_per_cycle(const pmc_counter_t * const, const int);
 *
 *   pmc_intervals_t pmc_ratio_intervals(const pmc_counter_t * const, const int, const int);
 *   double pmc_min_ratio_events(const pmc_counter_t * const, const int, const int);
 *   double pmc_max_ratio_events(const pmc_counter_t * const, const int, const int);
 *   double pmc_mean_ratio_events(const pmc_counter_t * const, const int, const int);
 *   double pmc_variance_ratio_events(const pmc_counter_t * const, const int, const int);
 *
 * Conversion
 *   double pmc_seconds(const pmc_cycles_t);
 *
 * Initialization of accumulator
 *   void pmc_counter_init(pmc_counter_t *, const pmc_control_t *);
 *	counter = 0;
 *	set the counter's selector field from the indicated events
 *	set the counter's clean field from the control's clean field
 *	set the counter's stats field from the control's stats field
 *   void pmc_counter_reset(pmc_counter_t *);
 *	counter = 0;
 *	but, retain the counter's selector, clean, stats and overhead fields
 *
 * Selection of events [change some hardware registers]
 *   int pmc_select(pmc_counter_t *);
 *	requires previous call to pmc_open()
 *	first use, compute pmc_read() overhead based on counter's clean field
 *	returns TRUE if successful, FALSE otherwise
 *   int pmc_select_clock(pmc_counter_t *);
 *	does not require previous call to pmc_open()
 *	first use, compute pmc_read_clock() overhead based on counter's clean
 *	  field
 *	returns TRUE if successful, FALSE otherwise
 *
 * Arithmetic of counters and accumulators
 *   pmc_cycles_t pmc_accumulate(pmc_counter_t *, pmc_data_t *, pmc_data_t *);
 *	counter += (data1 -= data0);
 *	subtract pmc_read() overhead determined by pmc_select()
 *	return cycles from pmc_open(), or last pmc_start(), to original data1
 *	special treatmnent for data0 == 0
 *   pmc_cycles_t pmc_accumulate_clock(pmc_counter_t *, pmc_data_t *,
 *		pmc_data_t *);
 *	counter += (data1 -= data0);
 *	subtract pmc_read_clock() overhead determined by pmc_select_clock()
 *	return cycles from pmc_open(), or last pmc_start(), to original data1
 *	special treatmnent for data0 == 0
 *   void pmc_accumulate_counter(pmc_counter_t *, pmc_counter_t *);
 *	counter0 += counter1;
 *	WARNING!  It is not required that the selector fields agree.
 *
 * Naming convention:
 *   These values represent an interval of time, between two moments,
 *   so cycles and events are plural.
 *   But, considering that pmc_accumulate() changes one of the pmc_data_t
 *   values from a moment to an interval, the convention is not consistent.
 */

/*----------------------------------------------------------------------------*/

/* pmc_intervals_t, pmc_cycles_t, pmc_events_t
 *
 * cycle and event accumulators, user interface
 */

#ifdef PMC_UNSIGNED_ACCUMULATOR
#define UNSIGNED unsigned
#else
#define UNSIGNED
#endif

typedef UNSIGNED long long int pmc_intervals_t;
typedef UNSIGNED long long int pmc_cycles_t;
typedef UNSIGNED long long int pmc_events_t;

#if defined(PMC_P5) || defined(PMC_P6) || defined(PMC_P15) || defined(PMC_K7)
#define pmc_cycles_bits 64
#define pmc_events_bits 64
#endif

/*
 * compare to the Linux kernel's definition of cycles_t
 *   2.0: not defined
 *   2.2: in /usr/include/asm/timex.h
 *   2.4: in /usr/include/asm/timex.h
 */

/*----------------------------------------------------------------------------*/

/* bundle of cycle and event counters */

typedef struct {
  pmc_cycles_t cycles;
  pmc_events_t events[pmc_event_counters];
} pmc_counters_t;

	/* this is a struct so it can used with = and as a return value */

/*----------------------------------------------------------------------------*/

/* bundle of cycle and event counter statistics */

typedef struct {
  double cycles;
  double events[pmc_event_counters];
} pmc_stats_t;

/*----------------------------------------------------------------------------*/

/* bundle of event counter per-cycle rates */

typedef struct {
  double rate[pmc_event_counters];
} pmc_rates_t;

/*----------------------------------------------------------------------------*/

/* pmc_counter_t
 *
 * processed data collected by pmc_accumulate()
 */

typedef struct
  {
    /* event codes */
    pmc_selector_t selector;

    /* housekeeping */
    int clean;		/* clean == 0,+ve -- remove pmc_read() overhead? */
    int stats;		/* stats == 0,1,2 -- level of statistical detail */

    /* pmc_read() overhead estimate, if clean > 0 and pmc_select() is done */
    pmc_counters_t overhead;

    /* up to here, preserved by pmc_counter_reset() */

    /* from here, used with statistics calculations (stats == 0,1,2) */

    /* number of measured intervals */
    pmc_intervals_t intervals;
	/* time intervals */
    pmc_intervals_t nonzero_intervals;
	/* time intervals where the number of cycles is nonzero */
    pmc_intervals_t ratio_intervals[pmc_event_counters][pmc_event_counters];
	/* time intervals where the second number of events is nonzero */

    /* summation over all time intervals */
    pmc_counters_t sum;

    /* from here, used with statistics calculations (stats == 1,2) */

    /* minimum, maximum */
    pmc_counters_t min, max;

    /* mean,
     * sum of squares of deviations from the mean, used to compute variance
     */
    pmc_stats_t mean, ssq;

    /* event rates, for nonzero intervals */
    pmc_rates_t Sum, Min, Max, Mean, Ssq;

    /* from here, used with ratio calculations (stats == 2) */

    /* ratio, event i / event j */
    double  sum_ratio[pmc_event_counters][pmc_event_counters];
    double  min_ratio[pmc_event_counters][pmc_event_counters];
    double  max_ratio[pmc_event_counters][pmc_event_counters];
    double mean_ratio[pmc_event_counters][pmc_event_counters];
    double  ssq_ratio[pmc_event_counters][pmc_event_counters];
  }
  pmc_counter_t
  PMC_ALIGN(PMC_CACHE_LINE);

/*----------------------------------------------------------------------------*/

/* pmc_event_set_t, pmc_event_label_t
 *
 * events to be monitored concurrently
 */

typedef int  pmc_event_set_t[pmc_event_counters];
typedef char pmc_event_label_t[PMC_STRING];

/*----------------------------------------------------------------------------*/

/* pmc_control_t
 *
 * control information for the Intel Pentium processor family
 * AMD Athlon is like a P6
 */

/* changes to this typedef require changes to
 *   pmc_control.c
 *     pmc_control_null
 *     pmc_control_print()
 *   pmc_getargs.c
 *     pmc_getargs()
 *   pmc_job.c
 *   pmc_lib.c
 * and probably elsewhere
 */

typedef struct
  {
    /* information to construct the pmc_counter_t control field */
    int event   [pmc_event_counters];	/* event code */
    int duration[pmc_event_counters];	/* count events (0) or clocks (1) */
    int user    [pmc_event_counters];	/* enable for CPL = [1,2,]3 */
    int os      [pmc_event_counters];	/* enable for CPL = 0[,1,2] */
    int pc      [pmc_event_counters];	/* pin control */
#if defined(PMC_P6) || defined(PMC_K7)
    int mesi    [pmc_event_counters];	/* unit mask, L2 MESI protocol */
    int bus     [pmc_event_counters];	/* unit mask, external bus logic */
    int mmx     [pmc_event_counters];	/* unit mask, MMX instructions */
    int compare [pmc_event_counters];	/* counter comparison */
    int invert  [pmc_event_counters];	/* comparison inversion flag */
    int apic    [pmc_event_counters];	/* APIC flag */
    int enable  [pmc_event_counters];	/* enable */
#endif	/* PMC_P6, PMC_K7 */
#if defined(PMC_P15)
/* begin unfinished */
    int mesi    [pmc_event_counters];	/* unit mask, L2 MESI protocol */
    int bus     [pmc_event_counters];	/* unit mask, external bus logic */
    int mmx     [pmc_event_counters];	/* unit mask, MMX instructions */
    int compare [pmc_event_counters];	/* counter comparison */
    int invert  [pmc_event_counters];	/* comparison inversion flag */
    int apic    [pmc_event_counters];	/* APIC flag */
    int enable  [pmc_event_counters];	/* enable */
/* end unfinished */
#endif	/* PMC_P15 */
    /* */
    pmc_event_label_t label;	/* descriptive text */
    /* */
    int mhz;			/* processor, Mcycles per second */
    /* */
    int sample_rate;		/* samples per second */
    int flush_rate;		/* samples per fflush() */
    int rotate_rate;		/* samples per rotation of events */
    int clean;			/* from -Clean n */
    int stats;			/* from -Stats [0|1|2] */
    int raw;			/* from -Raw [0|1|2] */
    int trim;			/* from -trim n */
    /* */
    int file_input;		/* flag from -input */
    int file_output;		/* flag from -output */
    char input_file[PMC_STRING];
    char output_directory[PMC_STRING];
    /* */
    int event_pairs;		/* number of event pairs */
    int replication;		/* replication factor for event pairs */
    int num_counters;		/* event_pairs * replication */
    pmc_event_set_t * events;	/* event pairs */
    pmc_event_label_t * labels;	/* event pair descriptions */
    pmc_counter_t * counters;	/* event counters */
  }
  pmc_control_t;

/*----------------------------------------------------------------------------*/

/* global variables */

extern const pmc_control_t pmc_control_null;	/* defined in pmc_control.c */
extern double pmc_seconds_per_cycle;		/* defined in pmc_getargs.c */
	/* for the cycle counter, not the processor or system bus */

/*----------------------------------------------------------------------------*/

/* defined in pmc_control.c */

void pmc_control_print
  (FILE * outfile, const char * const description,
   const pmc_control_t * const ctl);

/*----------------------------------------------------------------------------*/

/* defined in pmc_events.c */

int pmc_event_valid(const int code, const int counter);
const char * pmc_event_name(const int code, const int counter);
const char * pmc_event_description(const int code, const int counter);

/*----------------------------------------------------------------------------*/

/* defined in pmc_getargs.c */

int pmc_getargs
  (FILE * errfile, const char * prog,
   int * argc, char ** argv[], pmc_control_t * ctl);

/*----------------------------------------------------------------------------*/

/* defined in pmc_job.c */

void pmc_run_command
  (const int argc, char * argv[], pmc_control_t * ctl);
void pmc_begin_sampling
  (const int argc, char * argv[], pmc_control_t * ctl);
void pmc_end_sampling
  (const int argc, char * argv[], pmc_control_t * ctl);
int pmc_print_results
  (const int argc, char * argv[], const pmc_control_t * ctl);

/*----------------------------------------------------------------------------*/

/* defined in pmc_job.c */

typedef void (*pmc_sighandler_t)(int);	/* pointer to a signal handler */

void pmc_catch_signals(const int n, const int number[], pmc_sighandler_t handler);
void pmc_restore_signals(const int n, const int number[]);

/*----------------------------------------------------------------------------*/

/* defined in pmc_lib.c */

int  pmc_control_alloc(const int events, const int repl, pmc_control_t * ctl);
void pmc_control_free(pmc_control_t * ctl);

pmc_data_t * pmc_data_alloc(const int size);
void pmc_data_free(pmc_data_t * ticks);

pmc_counter_t * pmc_counter_alloc(const int size);
void pmc_counter_free(pmc_counter_t * counter);

void pmc_counter_print
  (FILE * outfile, const char * const description,
   const pmc_counter_t * const counter);

/*----------------------------------------------------------------------------*/

/* defined in pmc_lib.c */

int  pmc_open(const int save);
void pmc_close(void);
void pmc_start(void);

void pmc_counter_init(pmc_counter_t * counter, const pmc_control_t * ctl);
void pmc_counter_reset(pmc_counter_t * counter);

int pmc_reset(void);
int pmc_select(pmc_counter_t * counter);
int pmc_select_clock(pmc_counter_t * counter);

pmc_cycles_t pmc_accumulate
  (pmc_counter_t * counter, pmc_data_t * t1, pmc_data_t * t0);
pmc_cycles_t pmc_accumulate_clock
  (pmc_counter_t * counter, pmc_data_t * t1, pmc_data_t * t0);
void pmc_accumulate_counter
  (pmc_counter_t * counter1, pmc_counter_t * counter0);

/*----------------------------------------------------------------------------*/

/* defined in pmc_lib.c */

int pmc_configure(const int feature, const int action);

/* feature */

#define PMC_CONFIGURE_CACHE				0
#define PMC_CONFIGURE_SYSTEM_ALIGNMENT_CHECKING		1
#define PMC_CONFIGURE_PROCESS_ALIGNMENT_CHECKING	2

/* action */

#define PMC_CONFIGURE_QUERY	0
#define PMC_CONFIGURE_OFF	1
#define PMC_CONFIGURE_ON	2
#define PMC_CONFIGURE_CLEAR	3
#define PMC_CONFIGURE_SET	4
#define PMC_CONFIGURE_FLUSH	5

/*----------------------------------------------------------------------------*/

/* access functions for pmc_data_t, defined in pmc_lib.c */

pmc_cycle_t pmc_cycle(const pmc_data_t * const a);
pmc_event_t pmc_event(const pmc_data_t * const a, const int i);

double pmc_second(const pmc_cycle_t c);

/*----------------------------------------------------------------------------*/

/* access functions for pmc_counter_t, defined in pmc_lib.c */

pmc_intervals_t pmc_intervals(const pmc_counter_t * const a);
pmc_intervals_t pmc_nonzero_intervals(const pmc_counter_t * const a);

pmc_cycles_t pmc_sum_cycles(const pmc_counter_t * const a);
pmc_events_t pmc_sum_events(const pmc_counter_t * const a, const int i);

pmc_cycles_t pmc_min_cycles(const pmc_counter_t * const a);
pmc_events_t pmc_min_events(const pmc_counter_t * const a, const int i);

pmc_cycles_t pmc_max_cycles(const pmc_counter_t * const a);
pmc_events_t pmc_max_events(const pmc_counter_t * const a, const int i);

double pmc_mean_cycles(const pmc_counter_t * const a);
double pmc_mean_events(const pmc_counter_t * const a, const int i);

double pmc_variance_cycles(const pmc_counter_t * const a);
double pmc_variance_events(const pmc_counter_t * const a, const int i);

double pmc_min_events_per_cycle(const pmc_counter_t * const a, const int i);
double pmc_max_events_per_cycle(const pmc_counter_t * const a, const int i);
double pmc_mean_events_per_cycle(const pmc_counter_t * const a, const int i);
double pmc_variance_events_per_cycle(const pmc_counter_t * const a, const int i);

pmc_intervals_t pmc_ratio_intervals(const pmc_counter_t * const a, const int i, const int j);
double pmc_min_ratio_events(const pmc_counter_t * const a, const int i, const int j);
double pmc_max_ratio_events(const pmc_counter_t * const a, const int i, const int j);
double pmc_mean_ratio_events(const pmc_counter_t * const a, const int i, const int j);
double pmc_variance_ratio_events(const pmc_counter_t * const a, const int i, const int j);

double pmc_seconds(const pmc_cycles_t c);

/*----------------------------------------------------------------------------*/

/* Acquire data from performance-monitoring counters.
 *
 * The version defined in pmc_lib.c enables some additional options.
 * The options PMC_VERBOSE and PMC_LIST are not implemented here,
 * since they require access to pmc_verbose and pmc_stderr, which are kept
 * private to the library.
 * The option PMC_READ_KERNEL_MODE is not implemented here, since it requires
 * access to /dev/pmc, and is required with the original Pentium, which does
 * not implement the rdpmc instruction.
 * Thus, any of these options will override PMC_READ_INLINE.
 *
 * Some alternative methods that were considered but rejected include:
 *   pmc_read_start() : suppress interrupts then read
 *   pmc_read_stop()  : read then reenable interrupts
 *      On the IA-32 processors, this requires cli/sti instructions,
 *      which are privileged.
 *   pmc_read_subset() : obtain fewer than all of the event counters.
 *      Inspection of too many run-time options is counter-productive :-).
 *      Compare to pmc_read_clock() which obtains only the cycle counter.
 */

/* public interface */

/* Important!
 * If the library is compiled with -DPMC_READ_INLINE then the user code
 * must also be compiled with -DPMC_READ_INLINE.
 * The easy way to do this is
 *	gcc `pmc_options` prog.c -lm -lpmc
 */

#ifndef PMC_READ_INLINE
void pmc_read(pmc_data_t * ticks);		/* defined in pmc_lib.c */
#else
static __inline__
void pmc_read(pmc_data_t * ticks)
{
  /* assume /dev/pmc is open */

#ifdef PMC_READ_SERIAL
  PMC_ASM_SERIALIZE;
#endif

  PMC_ASM_READ_ALL_DATA(ticks);
}
#endif	/* PMC_READ_INLINE */

static __inline__
void pmc_read_clock(pmc_data_t * ticks)
{
#ifdef PMC_READ_SERIAL
  PMC_ASM_SERIALIZE;
#endif

  PMC_ASM_READ_CLOCK(ticks);
}

/*----------------------------------------------------------------------------*/

#endif	/* PMC_LIB_H */
