/*----------------------------------------------------------------------------*/

/*
 * Performance-Monitoring Counters Library, for Intel/AMD Processors and Linux
 * Author:  Don Heller, dheller@scl.ameslab.gov
 * Last revised:  5 October 2001
 */

/*----------------------------------------------------------------------------*/

     /* See pmc_options.h for a description of the compile-time options. */

/*----------------------------------------------------------------------------*/

/* defined in this file:
 *
 * public interface
 *
 *	(none)
 *
 * private interface
 *
 *	int pmc_verify_datatypes();
 *	int pmc_verify_processor();
 *	int pmc_verify_alignment();
 *	int pmc_guess_mhz();
 *	int pmc_cache_size();	[valid only after pmc_verify_processor()]
 *
 * local to this file
 *
 *	static const int Speed[];
 *	static const int size_Speed;
 *	static const char none[];
 *	static const char * const Intel_Type[];
 *	static const char * const Intel_Family[];
 *	static const char * const Intel_Extended_Family[];
 *	static const char * const Intel_4_Model[];
 *	static const char * const Intel_5_Model[];
 *	static const char * const Intel_6_Model[];
 *	static const char * const Intel_15_Model[];
 *	static const char * const Intel_15_Extended_Model[];
 *	static const char * const AMD_4_Model[];
 *	static const char * const AMD_5_Model[];
 *	static const char * const AMD_6_Model[];
 *	static const char * const Intel_Brand[];
 *	static const int size_Intel_Brand;
 *	static const char * const Intel_Feature[];
 *	static const char * const AMD_Feature[];
 *	static const char * const Cyrix_Feature[];
 *	static const struct { ...  } Intel_Cache[];
 *	static const int size_Intel_Cache;
 *	static const char amd_full[];
 *	static const char amd_nway[];
 *	static const char amd_entries[];
 *	static const char amd_lines[];
 *
 *	static __inline__ pmc_uint32_t read_eflags(void);
 *	static __inline__ void write_eflags(pmc_uint32_t eflags);
 *	static __inline__ void read_cpuid();
 *
 *	static int closest_Speed();
 *
 *	static int L1_i_size, L1_d_size, L2_size, L3_size;
 */

/*----------------------------------------------------------------------------*/

/* for sleep(), usleep() */
#include <unistd.h>

/* for time(), nanosleep() */
#include <time.h>

/* for gettimeofday() */
#include <sys/time.h>
#include <unistd.h>

/* for fabs() */
#include <math.h>

/* for strcmp() */
#include <string.h>

/*----------------------------------------------------------------------------*/

#include <pmc_private.h>
#include <pmc_asm.h>

/*----------------------------------------------------------------------------*/

/*
 * References, Intel Corp.
 *   Some of the older documents have been removed from the Intel web site.
 *   A little ingenuity with a search engine may be required.
 *
 *   "Intel Processor Identification and the CPUID Instruction",
 *   Application Note AP-485, Feb. 2001, order no. 241618-017,
 *   http://developer.intel.com/design/pentiumii/applnots/241618.htm
 *	Note that earlier revisions of this document differ in their
 *	explanations of the features, cache sizes, etc.
 *      revision				newest processor listed
 *        1	  May 1993	(not seen)
 *        2	 Oct. 1993			Pentium 60/66 MHz
 *        3	Sept. 1994	(not seen)
 *        4	 Dec. 1995			Pentium Pro
 *        5	 Nov. 1996	(not seen)	Pentium/MMX
 *        6	March 1997			Pentium II model 3
 *        7	 June 1997 (same as rev. 6?)
 *        8	 Jan. 1998			Pentium II model 5
 *        9	April 1998			Celeron (a PII model 5)
 *	 10      June 1998			Pentium II Xeon
 *	 11      Dec. 1998			Celeron (model 6)
 *	 12      Jan. 1999			Pentium III, III Xeon
 *	 13      Oct. 1999			Pentium III (model 8)
 *	 14	March 2000	(not seen)	Celeron (model 8)
 *	 15	  May 2000			Pentium III Xeon (model A)
 *	 16      Nov. 2000			Pentium 4
 *	 17      Feb. 2001                      Pentium 4 (DAZ)
 *	 18      June 2001			Pentium III (model B)
 *	Revision 2: "Some non-essential information regarding the Pentium
 *	processor is considered Intel confidential and proprietary and has
 *	not been documented in this publication.  This information is
 *	provided in the Supplement to the Pentium Processor User's Manual
 *	and is available with the appropriate non-disclosure agreements in
 *	place.  Contact Intel Corporation for details." (p. 4, about the
 *	feature flags)
 *	Revision 3: All the defined feature flags are now explained.
 *
 *   "Intel Pentium 4 Processor Identification and the CPUID Instruction",
 *   http://developer.intel.com/design/processor/future/manuals/CPUID_Supplement.htm
 *	Some information in this document is not confirmed by AP-485 rev. 16.
 *      revision				newest processor listed
 *	  1      July 2000			Pentium 4
 *
 *   Archival information for "mature" (dead, discontinued) processors
 *      complete list   http://www.intel.com/support/processors/archive.htm
 *	Pentium		http://www.intel.com/support/processors/pentium/
 *	Pentium/MMX	http://www.intel.com/support/processors/pentiummmx/
 *	Pentium Pro	http://www.intel.com/support/processors/pentiumpro/
 *
 *   Intel Architecture Software Developer's Manual,
 *   (The 1999 editions include the Pentium III.)
 *     vol. 1, Basic Architecture, order no. 243190, 1999;
 *     vol. 2, Instruction Set Reference, order no. 243191, 1999;
 *     vol. 3, System Programming Guide, order no. 243192, 1999.
 *     http://www.intel.com/design/pentiumii/manuals/243190.htm
 *     http://www.intel.com/design/pentiumii/manuals/243191.htm
 *     http://www.intel.com/design/pentiumii/manuals/243192.htm
 *	 vol. 1: Pentium Pro microarchitecture description
 *	 vol. 2: cpuid, rdmsr, rdpmc, rdtsc, wrmsr instructions
 *	 vol. 3: Ch. 2, System Architecture Overview;
 *		 Ch. 15, Debugging and Performance Monitoring;
 *		 App. A, Performance-Monitoring Events;
 *		 App. B, Model-Specific Registers
 *   (The 2000-1 editions include the Pentium 4.)
 *     vol. 1, Basic Architecture, order no. 245470, 2001;
 *     vol. 2, Instruction Set Reference, order no. 245471, 2001;
 *     vol. 3, System Programming Guide, order no. 245472, 2001.
 *     http://developer.intel.com/design/pentium4/manuals/245470.htm
 *     http://developer.intel.com/design/pentium4/manuals/245471.htm
 *     http://developer.intel.com/design/pentium4/manuals/245472.htm
 *
 *   Intel Corp., Pentium Processor Family User's Manual
 *     vol. 1, Pentium Processor Family Data Book
 *     vol. 2, 82496/82497 Cache Controller and 82491/82492 Cache SRAM Data Book
 *     vol. 3. Architecture and Programming Manual
 *     1994, order no. 241428, 241429, 241430.
 *	(vol. 3 has the infamous Appendix H)
 *
 *   Intel Corp., Processor Family Developer's Manuals,
 *     Pentium Processor Family Developer's Manual,
 *       vol. 3, Architecture and Programming Manual,
 *       1995, order no. 241430.
 *       http://www.intel.com/design/pentium/manuals/241430.htm
 *
 *     Pentium Processor Family Developer's Manual,
 *       1997, order no. 241428-005.
 *       http://www.intel.com/design/pentium/manuals/241428.htm
 *	 http://developer.intel.com/design/intarch/manuals/241428.htm
 *
 *     Pentium Pro Family Developer's Manual,
 *       vol. 1, Specifications,
 *       1996, order no. 242690-001.
 *       http://www.intel.com/design/pro/manuals/242690.htm
 *
 *     Pentium II Processor Developer's Manual,
 *       Oct. 1997, order no. 243502.
 *       http://www.intel.com/design/pentiumii/manuals/243502.htm
 *
 *   Intel Corp., datasheets
 *     Pentium
 *       http://developer.intel.com/design/pentium/datashts/241997.htm
 *       June 1997, order no. 241997-010
 *     Pentium Processor with MMX[tm] Technology
 *       http://developer.intel.com/design/pentium/datashts/243185.htm
 *       June 1997, order no. 243185-004
 *
 *   Intel Corp., monthly specification updates
 *     60- and 66-MHz Pentium Processor Specification Update
 *       Feb. 1997, order no. 243326-001 (the only version)
 *       http://www.intel.com/design/pentium/specupdt/243326.htm
 *     Pentium Processor Specification Update
 *       Jan. 1999, order no. 242480-041 (final version)
 *       http://www.intel.com/design/pentium/specupdt/242480.htm
 *     Pentium Pro Processor Specification Update
 *       Jan. 1999, order no. 242689-035 (final version)
 *       http://www.intel.com/design/pro/specupdt/242689.htm
 *     Pentium II Processor Specification Update
 *       Dec. 2000, order no. 243337-040
 *       http://www.intel.com/design/pentiumii/specupdt/243337.htm
 *     Pentium II Xeon[tm] Processor Specification Update
 *       Dec. 2000, order no. 243776-027
 *       http://developer.intel.com/design/pentiumii/xeon/specupdt/243776.htm
 *     Mobile PentiumŪ II Processor Specification Update
 *       Dec. 2000, order no. 243887-027
 *       http://developer.intel.com/design/mobile/specupdt/243887.htm
 *     Intel Celeron[tm] Processor Specification Update
 *       Dec. 2000, order no. 243748-031
 *       http://developer.intel.com/design/celeron/specupdt/243748.htm
 *     Mobile Intel(R) Celeron(TM) Processor at 500 MHz, 450 MHz and 400A MHz Specification Update
 *       Dec. 2000, order no. 245421-011
 *       http://developer.intel.com/design/mobile/specupdt/245421.htm
 *     Mobile IntelŪ Celeron[tm] Processor Specification Update
 *       Dec. 2000, order no. 244444-021
 *       http://developer.intel.com/design/mobile/specupdt/244444.htm
 *     Pentium III Processor Specification Update
 *       Dec. 2000, order no. 244453-024
 *       http://developer.intel.com/design/pentiumiii/specupdt/244453.htm
 *     Pentium III Xeon Processor Specification Update
 *       Dec. 2000, order no. 244460-023
 *       http://developer.intel.com/design/pentiumiii/xeon/specupdt/244460.htm
 *     Mobile PentiumŪ III Processor Specification Update
 *       Dec. 2000, order no. 245306-014
 *       http://developer.intel.com/design/mobile/specupdt/245306.htm
 *
 *   Intel Corp., optimization guides
 *     Intel Architecture Optimization Manual
 *       1997, order no. 242816-003
 *       http://developer.intel.com/design/pro/MANUALS/242816.htm
 *     Intel Architecture Optimization Reference Manual
 *       1999, order no. 245127-001
 *       http://www.intel.com/design/pentiumii/manuals/245127.htm
 *     Intel Pentium 4 Processor Optimization Reference Manual
 *       2000, order no. 248966-001
 *       http://developer.intel.com/design/pentium4/manuals/248966.htm
 *
 *   Intel Corp.,
 *     Intel Processor Serial Number
 *       Application Note AP-909, March 1999, order no. 245125-001
 *       http://www.intel.com/design/pentiumiii/applnots/245125.htm
 *
 *   Intel Corp.,
 *     Identifying Support for Streaming SIMD Extensions in the Processor
 *         and Operating System
 *       Jan. 1999, order no. 244413-002
 *       http://developer.intel.com/vtune/cbts/strmsimd/900down.htm
 *
 *   Intel Literature Center:
 *     http://developer.intel.com/design/litcentr/index.htm
 *
 *   "Using the RDTSC Instruction for Performance Monitoring",
 *     1997,
 *     http://developer.intel.com/drg/pentiumII/appnotes/RDTSCPM1.HTM
 *	[a survey of implementation issues, recommended reading,
 *	but there is a consistent serious bug in their examples]
 *
 *   "Survey of Pentium Processor Performance Monitoring Capabilities & Tools",
 *     1996,
 *     http://developer.intel.com/drg/mmx/AppNotes/PERFMON.HTM
 *	[a useful survey when written, but now out-of-date]
 *
 *   "A Quick and Easy Way to Run CPUID at Run-time",
 *     http://developer.intel.com/software/idap/media/pdf/877.htm
 *	[structured exception handling in Microsoft Visual C++]
 *
 *   Intel Processor Frequency ID Utility
 *     http://www.intel.com/support/processors/tools/frequencyid/
 *	[Windows and bootable versions, CPUID and bus/core frequency info;
 *	free]
 *
 *   VTUNE, Visual Tuning Environment, general description
 *     http://developer.intel.com/vtune/analyzer/index.htm
 *	[a useful tool if you run Microsoft operating systems, but there
 *	is also some additional information valid for any system]
 *
 *   P6Perf Utility
 *     http://developer.intel.com/vtune/p6perf/index.htm
 *     [unsupported by Intel]
 *
 *   Intel Technology Journal
 *
 *     3rd Quarter, 1997
 *       http://developer.intel.com/technology/itj/q31997.htm
 *
 *       MMX[tm] Technology Architecture Overview
 *       Millind Mittal, Alex Peleg, Uri Weiser
 *
 *       MMX[tm] Microarchitecture of Pentium Processors With
 *           MMX Technology and Pentium II Microprocessors
 *       Michael Kagan, Simcha Gochman, Doron Orenstien, Derrick Lin
 *
 *     1st Quarter, 1998
 *       http://developer.intel.com/technology/itj/q11998.htm
 *
 *       An Overview of the Intel TFLOPS Supercomputer
 *       Timothy G. Mattson and Greg Henry
 *
 *       The Performance of the Intel TFLOPS Supercomputer
 *       Greg Henry, Pat Fay, Ben Cole, and Timothy G. Mattson
 *
 *     2nd Quarter, 1999
 *	 http://developer.intel.com/technology/itj/q21999.htm
 *
 *	 Pentium III Processor Serial Number Feature and Applications
 *	 Stephen Fischer, James Mi, Albert Teng
 *
 *	 Programming Methods for the Pentium III Processor's Streaming
 *	   SIMD Extensions Using the VTune Performance Enhancement Environment
 *	 Joe H. Wolf III
 *
 *   Pentium 4 (Willamette) Processor Software Developer's Guide (preliminary)
 *     http://developer.intel.com/design/processor/wmtsdg.htm
 *
 *   Microprocessor Quick Reference Guide
 *     http://www.intel.com/pressroom/kits/quickrefyr.htm
 *     http://www.intel.com/pressroom/kits/quickreffam.htm
 *
 * References, not Intel
 *
 *   www.x86.org (Robert Collins, now Dr. Dobb's Journal)
 *     Two articles from Dr. Dobb's, Sep. and Nov. 1996:
 *     http://www.ddj.com/articles/1996/9609/9609o/9609o.htm
 *     http://www.ddj.com/articles/1996/9611/9611n/9611n.htm
 *
 *   www.sandpile.org (Christian Ludloff)
 *     http://www.sandpile.org/arch/cpuid.htm
 *     http://www.sandpile.org/arch/pemo.htm
 *     http://www.sandpile.org/arch/pemo_p5.htm
 *     http://www.sandpile.org/arch/pemo_p6.htm
 *
 *   Grzegorz Mazur, Identification of x86 CPUs with CPUID support
 *     http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html
 *
 *   Rob Wyatt, "Processor Detection and a Pentium III Update",
 *     Gamasutra, vol. 3, issue 27, July 9, 1999
 *     http://www.gamasutra.com/features/wyatts_world/19990709/processor_detection_01.htm
 *
 * References, Linux kernel source code
 *
 *   include/asm-i386/processor.h
 *   arch/i386/kernel/setup.c
 *		(setup.c has more information on non-Intel processors)
 */

/*
 * References, Advanced Micro Devices
 *   "AMD Processor Recognition Application Note",
 *   publication # 20734, Rev. Q, June 2000.
 *   http://www.amd.com/products/cpg/athlon/techdocs/index.html
 *
 *   AMD Processor Utilities (recognition, speed, etc.)
 *   http://www.amd.com/products/cpg/bin/
 *
 *   "AMD Athlon Processor x86 Code Optimization Guide",
 *   publication # 22007, June 2000.
 *   http://www.amd.com/products/cpg/athlon/techdocs/index.html
 */

/*
 * References, Cyrix Corp.
 *   "Cyrix CPU Detection Guide",
 *   Application Note 112, Rev. 1.9, July 21, 1998
 *   formerly available from http://www.cyrix.com/html/developers/index.htm
 *     when Cyrix was part of National Semiconductor
 *   VIA has no similar document available as of June 2000.
 */

/*----------------------------------------------------------------------------*/

/*
 * Additional notes.
 * 1. rdtsc is architectural, while rdpmc is not.  This mainly reflects Intel's
 *    commitment to support the Time Stamp Counter and the rdtsc instruction in
 *    future processors, and the inherent implementation-dependence of the
 *    Performance-Monitoring Counters.
 * 2. The TSC is set to 0 at system reset.  It presently increments once per
 *    processor cycle.  Only CPL = 0 can modify the TSC, and then only the lower
 *    32 bits can be set, while the upper 32 bits are cleared.  The TSC will
 *    continue to increment in the halt state (HLT instruction).
 *    At system reset, the event counter control registers are set to zero, and
 *    the event counters are undefined.
 *    All the counters are unchanged after an INIT signal.
 * 3. The rdtsc instruction is available with all the Pentium processors.  It
 *    is not serializing.
 *    On the first Pentium (60/66 MHz), rdtsc is 6 clocks at CPL = 0, 11 clocks
 *    at CPL = 1,2,3, according to the first manual, but experiments, which
 *    include sending the results to memory, indicate 28-29 clocks is a better
 *    estimate.
 *    On a Pentium II (450 MHz), experiments show rdtsc using about 33 clocks,
 *    including sending the results to memory.
 *    The Athlon documentation shows rdtsc at 11 clocks.
 * 4. The rdpmc instruction is available with the Pentium/MMX, Pentium Pro
 *    and Pentium II/III processors.  It is not serializing.
 *    Experiments on a Pentium II (450 MHz) show rdpmc to use about 34 clocks,
 *    with the results cleaned and sent to memory.
 *    The Athlon documentation does not give a cycle count for rdmsr, wrmsr
 *    or rdpmc.
 * 5. The Performance-Monitoring Counters are 40 bits wide, but are accessed
 *    as 64 bits with rdpmc or rdmsr.  The upper 24 bits are not specified by
 *    Intel, but in practice on the Pentium they are 0, while on the Pentium
 *    Pro they are the upper 24 bits of the Time Stamp Counter, and must be
 *    cleared before use.  The Pentium writes all 40 bits as presented, while
 *    the Pentium Pro sign-extends the lower 32 bits to 40 bits, which can be
 *    used with the overflow interrupt to generate a countdown mechanism.
 *    The Athlon's counters are 48 bits wide, with Pentium-like behavior.
 * 6. The Pentium 4 processor has a "fast" version of the rdpmc instruction
 *    where only the lower 32 bits are reported; this is used if bit 31 of
 *    the input register ecx is set.
 * 7. The Pentium 4 Performance-Monitoring Counters are completely different.
 *    Rewriting the current code may be difficult :-).
 */

/*----------------------------------------------------------------------------*/

/*
 * Concerning the Intel Pentium III serial number:
 *   Linux can be configured to disable this feature, and we do not attempt to
 *   restore it here.  The Pentium 4 does not include this feature.
 *
 * Concerning the "Denormals are Zero" feature:
 *   See AP-485, since version 17, for a description of how to test for this
 *   feature.  It only applies to the Pentium III and 4.
 *
 * Concerning non-Intel x86 processors:
 *   The Cyrix family can disable the cpuid instruction, if it is present.
 *   The Cyrix M1 processor core does not have the rdtsc instruction.
 *   The Cyrix MII processor implements the Pentium/MMX performance counters,
 *   but 48 bits wide.  National Semiconductor, the owner of Cyrix, announced
 *   they would abandon the PC processor market, to concentrate on embedded
 *   systems (May 5, 1999).  The Cyrix III is available from VIA (Feb. 2000).
 *   reference: Cyrix Corp., Cyrix MII Data Book, order no. 94329, Feb. 1999.
 *
 *   The AMD K6-III processor does not implement the performance counters.
 *   The Time Stamp Counter is available, but without rdtsc.  There are a
 *   number of features that would be of interest for performance studies,
 *   such as the ability to independently disable level 1 or 2 cache, or to
 *   install an external level 3 cache.
 *   reference: Advanced Micro Devices Corp., AMD-K6-III Processor Data Sheet,
 *   order no. 21918, Oct. 1999.
 *
 *   The AMD K7 (Athlon) implements performance counters in the style of the
 *   Intel P6 family.  There are four counters instead of two, but a limited
 *   set of events.  The Athlon is fully supported by this library.
 *
 * Concerning non-x86 processors:
 *   It is our intent to extend this library to various RISC processors
 *   (DEC Alpha, MIPS R10000, IBM Power3, SPARC 3, etc.), but if you are in
 *   a hurry, see the PTools PAPI project.
 */

/*----------------------------------------------------------------------------*/

/* known and probable Intel MHz ratings in the Pentium family
 *   for use with pmc_guess_mhz()
 *   must be sorted in increasing order
 *   we do not account for:
 *	overclocking or other manufacturers' P-ratings
 *	power-management techniques that change the MHz
 */

static const int Speed[] =
  {
    50,60,63,66,75,83,90	/* 25, 33, 50, 60, 66 MHz bus */
    , 100,120,125,133,150,166,180	/* 50, 60, 66 MHz bus */
    , 200, 233, 250, 266
    , 300, 333, 350, 366	/* 66, 100 MHz bus */
    , 400, 433, 450, 466
    , 500, 533, 550, 566	/* 100, 133 MHz bus */
    , 600, 633, 650, 666	/* Intel refers to 667 instead of 666 */
    , 700, 733, 750, 766
    , 800, 833, 850, 866
    , 900, 933, 950, 966
    ,1000,1033,1050,1066	/* 133 MHz bus */
    ,1100,1133,1150,1166
    ,1200,1233,1250,1266
    ,1300,1333,1350,1366
    ,1400,1433,1450,1466
    ,1500,1533,1550,1566
    ,1600,1633,1650,1666
    ,1700,1733,1750,1766
    ,1800,1833,1850,1866
    ,1900,1933,1950,1966
    ,2000,2033,2050,2066	/* 200, 400 MHz bus */
    ,2100,2133,2150,2166
    ,2200,2233,2250,2266
    ,2300,2333,2350,2366
    ,2400,2433,2450,2466
    ,2500,2533,2550,2566
    ,2600,2633,2650,2666
    ,2700,2733,2750,2766
    ,2800,2833,2850,2866
    ,2900,2933,2950,2966
    ,3000			/* speculation */
    ,1000000			/* larger than anything */
  };

static const int size_Speed = sizeof(Speed)/sizeof(int) - 1;

/*
 * 4004					1971	0.1
 * 8008					1972	0.2
 * 8080					1974	2
 * 8085					1976	5
 * 8086					1978	5,8,10
 * 8088					1979	5,8
 * 286					1982	6,10,12
 * 386					1985-92 16,20,25,33
 * 486					1989-94 16,20,25,33,50,66,75,100
 *
 * Pentium OverDrive for 486		1994	63,83
 * Pentium				1993	60,66 (samples at 50)
 * Pentium OverDrive for above		1996	120,133
 * Pentium				1994-6	75,90,100,120,133,150,166,200
 * Pentium OverDrive for above		1996	125,150,166
 * Pentium/MMX OverDrive for above	?	125,150,166,180,200
 * Pentium/MMX				1997-9	120,133,150,166,200,233,266,300
 * Pentium mobile			?	75,90,100
 * Pentium/MMX mobile			1997-9	166,200,233,266,300
 *
 * Pentium Pro				1995-7	150,166,180,200 (samples at 133)
 * Pentium II OverDrive for PPro	1998	300,333
 * Pentium II				1997-8	233,266,300,333,350,366,400,450
 * Pentium II mobile			1998-9	233,266,300,333,366,400
 * Celeron				1998-9	266,300,333,366,400,433,466,500
 *					2000	533,566,600
 * Celeron mobile			1999	266,300,333,366,400,433,466
 * Pentium II Xeon			1998-9	400,450
 * Pentium III				1999	400,450,500,533,550,600,650,
 *						667,700,733
 *					2000	700,750,800,850,866,933,1000,
 *						1133
 * Pentium III mobile			1999	400,450,500
 *					2000	600,650,700
 * Celeron				2000	?
 * Celeron mobile			2000	450,500,550
 * Pentium III Xeon			1999	500,550,600,667,733
 *					2000	800,866,933
 *
 * Pentium 4				2000-1	1300,1400,1500,1600,1700,1800,1900,
 *						2000
 * Xeon					2001
 *
 * AMD K6				?	166,200,233,266,300
 * AMD K6-2				?	233,266,300,333,366,400
 *						300,350,400,450,500,600
 *						380/95,475/95,533/97
 * AMD K6-III				?	350,400,450,500,550,600
 *						475/95
 * AMD Athlon				1999-
 *					2000	600,650,700,750,800,850,900,950,1000
 *
 * bus	core MHz
 *  25                  63
 *  33                  83
 *  50	 50,  75, 100, 125
 *  60	 60,  90, 120, 150, 180
 *  66	 66, 100, 133, 166, 200, 233, 266, 300, 333, 366, 400, 433, 466
 * 100	                         350, 400, 450, 500, 550, 600, 650, 700, 750
 * 133                                533, 600, 666, 733, 800, 866, 933,1000,1066,1133
 * 200
 * 400
 *  x   1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
 *
 * Note the 95,97 MHz AMD bus speeds.
 */

/*
 * Some more details for the Pentium Pro,
 *	cpu	host	PCI	ISA
 *		bus	bus	bus
 *	180	60	30	7.5
 *	200	66	33	8.33
 */

/*----------------------------------------------------------------------------*/

/* The pmc_uint64_t type assumes a certain layout in memory, which we check.
 *
 * return TRUE if the layout is as expected
 * return FALSE otherwise
 *
 * outfile and prog are only used for error messages before returning FALSE
 */

/* private interface */

int pmc_verify_datatypes(FILE * outfile, const char * const prog)
{
  int ret = TRUE;
  pmc_uint64_t a, b, c;

  PMC_VERBOSE_IN(pmc_verify_datatypes)

  /* check the size of the parts */

  if (sizeof(pmc_uint32_t) != 4)
    {
      if (outfile != NULL)
	fprintf(outfile,
	  "%s: pmc_verify_datatypes() failure, pmc_uint32_t size == %d != 4\n",
	  prog, sizeof(pmc_uint32_t));
      ret = FALSE;
    }

  if (sizeof(pmc_uint64_t) != 8)
    {
      if (outfile != NULL)
	fprintf(outfile,
	  "%s: pmc_verify_datatypes() failure, pmc_uint64_t size == %d != 8\n",
	  prog, sizeof(pmc_uint64_t));
      ret = FALSE;
    }

  a.bits64 = 0x1234567887654321LL;
  b.bits64 = 0x0000000080000000LL;
  c.bits64 = a.bits64 + b.bits64;	/* 0x1234567907654321LL */

  /* check the values in memory */

  if (a.bits32.low == 0x87654321L && a.bits32.high == 0x12345678L)
    {
    }
  else
  if (a.bits32.low == 0x12345678L && a.bits32.high == 0x87654321L)
    {
      if (outfile != NULL)
	fprintf(outfile,
	  "%s: pmc_verify_datatypes() failure, word ordering\n", prog);
      ret = FALSE;
    }
  else
    {
      if (outfile != NULL)
	{
	  fprintf(outfile,
	    "%s: pmc_verify_datatypes() failure, byte ordering\n", prog);
	  fprintf(outfile,
	    "    0x%016llx is not 0x%08lx:%08lx\n",
	    a.bits64, a.bits32.high, a.bits32.low);
	}
      ret = FALSE;
    }

  /* check for the carry across the 4-byte boundary */

  if (c.bits64 != 0x1234567907654321LL)
    {
      if (outfile != NULL)
	{
	  fprintf(outfile,
	    "%s: pmc_verify_datatypes() failure, uint64 addition\n", prog);
	  fprintf(outfile,
	    "    0x%016llx is not 0x%016llx + 0x%016llx\n",
	    c.bits64, a.bits64, b.bits64);
	}
      ret = FALSE;
    }

  PMC_VERBOSE_OUT(pmc_verify_datatypes)

  return ret;
}

/*----------------------------------------------------------------------------*/

/* interpretations of cpuid output, from Intel, AMD and sandpile.org */

/* We have only used information for the Intel P5, P6 and NetBurst families,
 * and the AMD Athlon.
 * The cpuid instruction is not available on the 386 and early models
 * of the 486, and there's no point in coding the details for old parts
 * that don't have the Time Stamp Counter anyway.
 * Some of the parts in the P5 series misidentify themselves, and the
 * classification system is not implemented consistently by Intel for
 * any of the processor families.
 */

static const char none[] = "?";

static const char * const Intel_Type[4] =
  {
    "Original OEM (primary)", "OverDrive", "Dual (secondary)", "reserved"
  };

static const char * const Intel_Family[16] =
  {
    none, none, none, "P3", "P4", "P5", "P6", none,
    none, none, none, none, none, none, none, none /* use the extended family */
  };

/* use this only if the Intel_Family field is 15 */
static const char * const Intel_Extended_Family[16] =
  {
    "Pentium 4", none, none, none, none, none, none, none,
    none, none, none, none, none, none, none, none
  };

/* In some cases the model field indicates only one of several possibilities. */
static const char * const Intel_4_Model[16] =
  {
    /* 0 */	"i80486DX (25,33 MHz)",
    /* 1 */	"i80486DX (50 MHz)",
    /* 2 */	"i80486SX",
    /* 3 */	"i80486DX2",
    /* 4 */	"i80486SL",
    /* 5 */	"i80486SX2",
    /* 6 */	none,
    /* 7 */	"i80486DX2 (write-back enhanced)",
    /* 8 */	"i80486DX4",
    /* 9 */	"i80486DX4 (write-back enhanced)",
    /* A */	none, none, none, none, none, none
  };

static const char * const Intel_5_Model[16] =
  {
    /* 0 */	"Pentium (P5, 60,66 MHz, A-step)",
    /* 1 */	"Pentium (P5, 60,66 MHz)",
    /* 2 */	"Pentium (P54C, 75-200 MHz)",
    /* 3 */	"Pentium (P24T OverDrive, 486 replacement)",
			/* 486 SX,DX,SX2,DX2 */
    /* 4 */	"Pentium/MMX (P55C)",
    /* 5 */	"Pentium (OverDrive, 486/DX4 replacement)",
    /* 6 */	"Pentium (OverDrive)",
    /* 7 */	"Pentium (P54C, 75-200 MHz)",
    /* 8 */	"Pentium/MMX (P55C, 0.25 micron, mobile)",
    /* 9 */	none, none, none, none, none, none, none
  };

static const char * const Intel_6_Model[16] =
  {
    /* 0 */	"Pentium Pro (A-step)",
    /* 1 */	"Pentium Pro",
    /* 2 */	none,
    /* 3 */	"Pentium II (model 3)",		/* Klamath */
    /* 4 */	none,
    /* 5 */	"Pentium II (model 5)",		/* Deschutes */
    /* 6 */	"Pentium II (model 6)",		/* mobile */
    /* 7 */	"Pentium III (model 7)",	/* Katmai */
    /* 8 */	"Pentium III (model 8)",	/* Coppermine */
    /* 9 */	none,
    /* A */	"Pentium III (model A)",
    /* B */	"Pentium III (model B)",
    /* C */	none,
    /* D */	none,
    /* E */	none,
    /* F */	none	/* use the extended model */
  };

static const char * const Intel_15_Model[16] =
  {
    /* 0 */	"Pentium 4",
    /* 1 */	none,
    /* 2 */	none,
    /* 3 */	none,
    /* 4 */	none,
    /* 5 */	none,
    /* 6 */	none,
    /* 7 */	none,
    /* 8 */	none,
    /* 9 */	none,
    /* A */	none,
    /* B */	none,
    /* C */	none,
    /* D */	none,
    /* E */	none,
    /* F */	none	/* use the extended model */
  };

/* use this only if the family field is 15 and the model field is 15 */
static const char * const Intel_15_Extended_Model[16] =
  {
    /* 0 */	none,
    /* 1 */	none,
    /* 2 */	none,
    /* 3 */	none,
    /* 4 */	none,
    /* 5 */	none,
    /* 6 */	none,
    /* 7 */	none,
    /* 8 */	none,
    /* 9 */	none,
    /* A */	none,
    /* B */	none,
    /* C */	none,
    /* D */	none,
    /* E */	none,
    /* F */	none
  };

static const char * const AMD_4_Model[16] =
  {
    /* 0 */	none, none, none,
    /* 3 */	"Am486DX2",	/* no cpuid */
    /* 4 */	none, none, none,
    /* 7 */	"Am486DX2WB",	/* no cpuid */
    /* 8 */	"Am486DX4",
    /* 9 */	"Am486DX4WB",
    /* a */	none, none, none, none,
    /* e */	"Am5x86",
    /* f */	"Am5x86WB"
  };

static const char * const AMD_5_Model[16] =
  {
    /* 0 */	"K5, PR 75, 90, 100",	/* no extended cpuid */
    /* 1 */	"K5, PR 120, 133",
    /* 2 */	"K5, PR 166",
    /* 3 */	"K5, PR 200",
    /* 4 */	none, none,
    /* 6 */	"K6, 166/233",
    /* 7 */	"K6, 200/300",
    /* 8 */	"K6-2, 233/600",
    /* 9 */	"K6-III, 350/600",
    /* a */	none, none, none, none, none, none
  };

static const char * const AMD_6_Model[16] =
  {
    /* 0 */	none,
    /* 1 */	"Athlon (model 1)",	/* 0.25 micron */
    /* 2 */	"Athlon (model 2)",	/* 0.18 micron */
    /* 3 */	none,
    /* 4 */	"Athlon (model 4)",	/* 0.18 micron, internal L2 cache */
    /* 5 */	none, none, none,
    /* 8 */	none, none, none, none, none, none, none, none
  };

/* The 8-bit brand field was introduced with the Pentium III model 8.
 * It is not guaranteed that brand codes will be assigned consecutively.
 * If more fields need to be entered, the data structure should be changed.
 */

static const char * const Intel_Brand[] =
  {
    /* 0 */	none,
    /* 1 */	"Celeron",
    /* 2 */	"Pentium III",
    /* 3 */	"Pentium III Xeon",
    /* 4 */	"Pentium III",
    /* 5 */	none,
    /* 6 */	none,
    /* 7 */	none,
    /* 8 */	"Pentium 4",
    /* 9 */	none,
    /* a */	none,
    /* b */	none,
    /* c */	none,
    /* d */	none,
    /* e */	"Xeon",
    /* f */	none
  };

static const int size_Intel_Brand =
	(sizeof(Intel_Brand)/sizeof(Intel_Brand[0]));


static const char * const Intel_Feature[32] =
  /* standard features */
  {
    /*  0 FPU	*/ "Floating Point Unit", /* on chip, 387 instructions */
    /*  1 VME	*/ "Virtual Mode Extension", /* virtual-8086 */
    /*  2 DE	*/ "Debugging Extension",
    /*  3 PSE	*/ "Page Size Extension", /* 4 MB pages */
    /*  4 TSC	*/ "Time Stamp Counter",
    /*  5 MSR	*/ "Model Specific Registers",
    /*  6 PAE	*/ "Physical Address Extension",
    /*  7 MCE	*/ "Machine Check Exception",
    /*  8 CX8	*/ "CMPXCHG8B Instruction",
    /*  9 APIC	*/ "Local APIC Hardware Enabled", /* on chip */
    /* 10 	*/ "(10, reserved, used with MTRR)",
    /* 11 SEP	*/ "Fast System Call", /* if signature >= 0x633 */
    /* 12 MTRR	*/ "Memory Type Range Registers",
    /* 13 PGE	*/ "Page Global Enable",
    /* 14 MCA	*/ "Machine Check Architecture",
    /* 15 CMOV	*/ "Conditional Move Instructions",
    /* 16 PAT	*/ "Page Attribute Table",
    /* 17 PSE-36  */ "36-bit Page Size Extension",
    /* 18 PSN	*/ "Processor Serial Number Enabled",
    /* 19 CLFSH */ "Cache Line Flush Instruction",
    /* 20 	*/ "(20, reserved)",
    /* 21 DS	*/ "Debug Store", /* branch history */
    /* 22 ACPI	*/ "Thermal Monitor and Power Management",
    		/* ACPI includes clock control by software through MSR's */
    /* 23 MMX	*/ "MMX Technology",
    /* 24 FXSR	*/ "Fast FPU Save/Restore Instructions",
    /* 25 SSE	*/ "Streaming SIMD Extensions, level 1",
    /* 26 SSE2	*/ "Streaming SIMD Extensions, level 2",
		/* SSE requires further OS support for the register set */
    /* 27 SS	*/ "Self-Snoop",
    /* 28 	*/ "(28, reserved)",
    /* 29 TM	*/ "Thermal Monitor",
    /* 30 	*/ "(30, reserved)",
    /* 31 	*/ "(31, reserved)"
  };

static const char * const AMD_Feature[32] =
  /* standard and extended features */
  {
    /*  0 */ "Floating Point Unit", /* on chip, 387 instructions */
    /*  1 */ "Virtual Mode Extensions",
    /*  2 */ "Debugging Extensions",
    /*  3 */ "Page Size Extensions", /* 4 MB pages */
    /*  4 */ "Time Stamp Counter",
    /*  5 */ "Model Specific Registers",
    /*  6 */ "Page Address Extensions",
    /*  7 */ "Machine Check Exception",
    /*  8 */ "CMPXCHG8B Instruction",
    /*  9 */ "Local APIC Hardware Enabled", /* on chip, Athlon model 2,4 */
		/* Global Paging Extension, AMD-K5 model 0 */
    /* 10 */ "(10, reserved)",
    /* 11 */ "Sysenter/Sysexit Instructions",	/* standard features */
	 /*  "Syscall/Sysret Instructions", */	/* extended features */
    /* 12 */ "Memory Type Range Registers",
    /* 13 */ "Global Paging Extension",
    /* 14 */ "Machine Check Architecture",
    /* 15 */ "Conditional Move Instructions",
    /* 16 */ "Page Attribute Table",
    /* 17 */ "36-bit Page Size Extension",
    /* 18 */ "(18, reserved)",
    /* 19 */ "(19, reserved)",
    /* 20 */ "(20, reserved)",
    /* 21 */ "(21, reserved)",
    /* 22 */ "AMD MMX Extensions",		/* extended features */
    /* 23 */ "MMX Instructions",
    /* 24 */ "Fast FPU Save/Restore Instructions",
    /* 25 */ "(25, reserved)",
    /* 26 */ "(26, reserved)",
    /* 27 */ "(27, reserved)",
    /* 28 */ "(28, reserved)",
    /* 29 */ "(29, reserved)",
    /* 30 */ "AMD 3DNow! Extensions",		/* extended features */
    /* 31 */ "3DNow! Instructions"		/* extended features */
  };

/* Cyrix uses the AMD standard features */

static const char * const Cyrix_Feature[32] =
  /* extended features */
  {
    /*  0 */ "Floating Point Unit", /* on chip, 387 instructions */
    /*  1 */ "Virtual Mode Extensions",
    /*  2 */ "Debugging Extensions",
    /*  3 */ "Page Size Extensions", /* 4 MB pages */
    /*  4 */ "Time Stamp Counter",
    /*  5 */ "Model Specific Registers",
    /*  6 */ "(6, reserved)",
    /*  7 */ "Machine Check Exception",
    /*  8 */ "CMPXCHG8B Instruction",
    /*  9 */ "(9, reserved)",
    /* 10 */ "(10, reserved)",
    /* 11 */ "Syscall/Sysret Instructions",
    /* 12 */ "(12, reserved)",
    /* 13 */ "Global Paging Extension",
    /* 14 */ "(14, reserved)",
    /* 15 */ "Integer Conditional Move Instructions",
    /* 16 */ "Fl.pt.  Conditional Move Instructions",
    /* 17 */ "(17, reserved)",
    /* 18 */ "(18, reserved)",
    /* 19 */ "(19, reserved)",
    /* 20 */ "(20, reserved)",
    /* 21 */ "(21, reserved)",
    /* 22 */ "(22, reserved)",
    /* 23 */ "MMX Instructions",
    /* 24 */ "Cyrix Multimedia Extensions",
    /* 25 */ "(25, reserved)",
    /* 26 */ "(26, reserved)",
    /* 27 */ "(27, reserved)",
    /* 28 */ "(28, reserved)",
    /* 29 */ "(29, reserved)",
    /* 30 */ "(30, reserved)",
    /* 31 */ "3DNow! Instructions"
  };


/* The Intel_Cache[] array MUST be sorted by the code.
 * If not stated otherwise, the information is from the AP-485 document, which
 * we take as authoritative, even if it has not been consistent.
 * [1] "Intel Pentium 4 Processor Identification and the CPUID Instruction",
 *	July 2000.
 */

static
  struct
    {
      const pmc_uint32_t code;
      const int L1_i_size, L1_d_size, L2_size, L3_size;	/* KB */
      const char * const descriptor;
    }
  Intel_Cache[] =
  {
    { 0x01, 0, 0, 0, 0,	"L1 instr TLB, 4K page, 4-way, 32 entries" }
#ifdef PMC_P5
  , { 0x02, 0, 0, 0, 0,	"L1 instr TLB, 4M page, uses same TLB as 4K page" }
#else
  , { 0x02, 0, 0, 0, 0,	"L1 instr TLB, 4M page, fully associative, 2 entries" }
#endif
  , { 0x03, 0, 0, 0, 0,	"L1 data  TLB, 4K page, 4-way, 64 entries" }
#ifdef PMC_P5
#ifndef PMC_MMX
  , { 0x04, 0, 0, 0, 0,	"L1 data  TLB, 4M page, 2-way,  2 entries" }
#else
  , { 0x04, 0, 0, 0, 0,	"L1 data  TLB, 4M page, uses same TLB as 4K page" }
#endif
#else
  , { 0x04, 0, 0, 0, 0,	"L1 data  TLB, 4M page, 4-way,  8 entries" }
#endif
  , { 0x06, 8, 0, 0, 0,	"L1 instr cache, 8 KB, 4-way, 32 byte line" }
  , { 0x08, 16, 0, 0, 0,	"L1 instr cache, 16 KB, 4-way, 32 byte line" }
  , { 0x0a, 0, 8, 0, 0,	"L1 data  cache, 8 KB, 2-way, 32 byte line" }
  , { 0x0c, 0, 16, 0, 0,	"L1 data  cache, 16 KB, 4-way, 32 byte line" }
  /* [1] */ , { 0x22, 0, 0, 0, 512,	"L3 cache, 512 KB, 4-way, sectored, 64 byte line" }
  /* [1] */ , { 0x23, 0, 0, 0, 1024,	"L3 cache, 1 MB, 8-way, sectored, 64 byte line" }
  /* [1] */ , { 0x25, 0, 0, 0, 2048,	"L3 cache, 2 MB, 8-way, sectored, 64 byte line" }
  /* [1] */ , { 0x29, 0, 0, 0, 4096,	"L3 cache, 4 MB, 8-way, sectored, 64 byte line" }
#ifdef PMC_P6
  , { 0x40, 0, 0, 0, 0,	"L2 cache not installed" }
#endif
#ifdef PMC_P15
  , { 0x40, 0, 0, 0, 0,	"L3 cache not installed" }
#endif
  , { 0x41, 0, 0, 128, 0,	"L2 cache, 128 KB, 4-way, 32 byte line" }
  , { 0x42, 0, 0, 256, 0,	"L2 cache, 256 KB, 4-way, 32 byte line" }
  , { 0x43, 0, 0, 512, 0,	"L2 cache, 512 KB, 4-way, 32 byte line" }
  , { 0x44, 0, 0, 1024, 0,	"L2 cache, 1 MB, 4-way, 32 byte line" }
  , { 0x45, 0, 0, 2048, 0,	"L2 cache, 2 MB, 4-way, 32 byte line" }
  , { 0x50, 0, 0, 0, 0,		"L1 instr TLB; 4K, 2M or 4M pages, fully associative, 64 entries" }
  , { 0x51, 0, 0, 0, 0,		"L1 instr TLB; 4K, 2M or 4M pages, fully associative, 128 entries" }
  , { 0x52, 0, 0, 0, 0,		"L1 instr TLB; 4K, 2M or 4M pages, fully associative, 256 entries" }
  , { 0x5b, 0, 0, 0, 0,		"L1 data  TLB; 4K or 4M pages, fully associative, 64 entries" }
  , { 0x5c, 0, 0, 0, 0,		"L1 data  TLB; 4K or 4M pages, fully associative, 128 entries" }
  , { 0x5d, 0, 0, 0, 0,		"L1 data  TLB; 4K or 4M pages, fully associative, 256 entries" }
  , { 0x66, 0, 8, 0, 0,		"L1 data cache, 8 KB, 4-way, sectored, 64 byte line" }
  , { 0x67, 0, 16, 0, 0,	"L1 data cache, 16 KB, 4-way, sectored, 64 byte line" }
  , { 0x68, 0, 32, 0, 0,	"L1 data cache, 32 KB, 4-way, sectored, 64 byte line" }
  , { 0x70, 12, 0, 0, 0,	"Instruction Trace Cache, 12K micro-ops, 8-way" }
  , { 0x71, 16, 0, 0, 0,	"Instruction Trace Cache, 16K micro-ops, 8-way" }
  , { 0x72, 32, 0, 0, 0,	"Instruction Trace Cache, 32K micro-ops, 8-way" }
  , { 0x79, 0, 0, 128, 0,	"L2 cache, 128 KB, 8-way, sectored, 64 byte line" }
  , { 0x7a, 0, 0, 256, 0,	"L2 cache, 256 KB, 8-way, sectored, 64 byte line" }
  , { 0x7b, 0, 0, 512, 0,	"L2 cache, 512 KB, 8-way, sectored, 64 byte line" }
  , { 0x7c, 0, 0, 1024, 0,	"L2 cache, 1 MB, 8-way, sectored, 64 byte line" }
  , { 0x81, 0, 0, 128, 0,	"L2 cache, 128 KB, 8-way, 32 byte line" }
  , { 0x82, 0, 0, 256, 0,	"L2 cache, 256 KB, 8-way, 32 byte line" }
  , { 0x83, 0, 0, 512, 0,	"L2 cache, 512 KB, 8-way, 32 byte line" }
  , { 0x84, 0, 0, 1024, 0,	"L2 cache, 1 MB, 8-way, 32 byte line" }
  , { 0x85, 0, 0, 2048, 0,	"L2 cache, 2 MB, 8-way, 32 byte line" }
  };

static const int size_Intel_Cache =
	(sizeof(Intel_Cache)/sizeof(Intel_Cache[0]));

#if 0
  /* Cyrix information from sandpile.org */
  , { 0x70, 0, 0, 0, 0,	"L1 instr/data TLB, 4K page, 4-way, 32 entries" }
  , { 0x80, 8, 8, 0, 0,	"L1 instr/data cache, 16 KB, 4-way, 16 byte line" }
#endif

/*----------------------------------------------------------------------------*/

/* read and write the EFLAGS register */

static __inline__ pmc_uint32_t read_eflags(void)
  {
    pmc_uint32_t eflags;
    __asm__ __volatile__ (
      "pushfl\n\t"	/* push from the eflags register */
      "popl %0"		/* pop to general register */
      : "=r" (eflags)	/* output */
      );
    return eflags;
  }

static __inline__ void write_eflags(pmc_uint32_t eflags)
  {
    __asm__ __volatile__ (
      "pushl %0\n\t"	/* push from general register */
      "popfl"		/* pop to the eflags register */
      :			/* output */
      : "r" (eflags)	/* input */
      );
  }

/*----------------------------------------------------------------------------*/

/* cpuid instruction */

static __inline__ void read_cpuid(int n,
  pmc_uint32_t *a, pmc_uint32_t *b, pmc_uint32_t *c, pmc_uint32_t *d)
  {
    __asm__ __volatile__ (
      "cpuid"
      : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)	/* output */
      : "0" (n)		/* input */
      );
  }

/*----------------------------------------------------------------------------*/

/* set by pmc_verify_processor() */

static int L1_i_size = 0, L1_d_size = 0, L2_size = 0, L3_size = 0;
	/* Kilobytes */

/* private interface */

int pmc_cache_size(int request)
{
  int ret = 0;

  switch(request)
  {
    case 0: ret = L1_i_size; break;
    case 1: ret = L1_d_size; break;
    case 2: ret = L2_size;   break;
    case 3: ret = L3_size;   break;
  }

  return ret;
}

/*----------------------------------------------------------------------------*/

/* formats for AMD cache description */
static const char amd_full[] = "fully associative";
static const char amd_nway[] = "%ld-way";
static const char amd_entries[] = ", %ld entries\n";
static const char amd_lines[] = ", %ld lines per tag, %ld byte line\n";

/*----------------------------------------------------------------------------*/

/* return TRUE if this is an Intel-compatible processor with all the right
 *   features for the performance-monitoring counters library
 * return FALSE otherwise
 *
 * if outfile is not NULL, information about the processor is printed to
 *   outfile.
 */

/* private interface */

#define Fprintf if (outfile != NULL) fprintf

int pmc_verify_processor(FILE * outfile, const char * const prog, int devpmc)
{
  pmc_uint32_t level = 0, signature = 0, features = 0, brand = 0, dummy;
  pmc_uint32_t extended_level = 0, extended_signature = 0, extended_features = 0;
  pmc_uint32_t type, family, model, stepping;
  pmc_uint32_t extended_family, extended_model;
  pmc_uint32_t brand_id, chunk_count, cpu_count, apic_id;
  pmc_uint32_t e0, e1, e2;	/* to manipulate eflags register */
  int i, j, k, ret = TRUE;
  int intel = FALSE, amd = FALSE, cyrix = FALSE, transmeta = FALSE;
  int umc = FALSE, nexgen = FALSE, centaur = FALSE, rise = FALSE;
  char vendor_id[] = "123456789012";	/* must be exactly 12 characters */

  PMC_VERBOSE_IN(pmc_verify_processor)

  L1_i_size = 0; L1_d_size = 0; L2_size = 0; L3_size = 0;	/* Kilobytes */

  if (pmc_verify_datatypes(outfile, prog) == FALSE)
    { return FALSE; }

  /* The cpuid instruction is not implemented on Intel 386 and older Intel 486
   * processors, on AMD processors before the Am486DX4, or on Cyrix processors
   * before the 6x86.
   *
   * If this was C++ we could try/catch the cpuid instruction, but it's not.
   */

#ifdef PMC_CHECK_FOR_OLD_PROCESSORS
  /* The code for 8086/8088/80286 should use 16-bit operands, while everything
   * since the 386 can use 32-bit operands.
   *
   * If this code fails to compile or execute, get yourself a new computer.
   */

  /* Check for 8086/8088 - bits 12-15 of flags register are always 1.
   * These bits are (12,13) IOPL (I/O privilege level), (14) NT, (15) constant 0.
   * Changing them at user-level like this is neither legal nor wise.
   */

  e0 = read_eflags();
  e1 = e0 & 0xffff0fff;
  write_eflags(e1);	/* try to clear bits 12-15 */
  e2 = read_eflags();
  write_eflags(e0);	/* put back the original */

  if ((e2 & 0xf000) == 0xf000)	/* are bits 12-15 still set? */
    {
      Fprintf(outfile,
	  "%s: could not clear bits 12-15 of flags register"
	  " (is this an 8086/8088?)\n", prog);
      return FALSE;
    }

  /* Check for 80286 - bits 12-15 of flags register are always 0 in real mode.
   * See above.
   */

  e0 = read_eflags();
  e1 = e0 | 0xf000;
  write_eflags(e1);	/* try to set bits 12-15 */
  e2 = read_eflags();
  write_eflags(e0);	/* put back the original */

  if ((e2 & 0xf000) == 0)	/* are bits 12-15 still clear? */
    {
      Fprintf(outfile,
	  "%s: could not set bits 12-15 of flags register"
	  " (is this an 80286?)\n", prog);
      return FALSE;
    }

  /* Check for 386 - can the alignment check flag be changed?
   * There was no such flag on the 386, it was new on the 486.
   * We assume the stack pointer is 4-byte aligned, so an alignment fault will
   * not actually occur here.  If you get a fault, get a new compiler.
   */

  e0 = read_eflags();
  e1 = e0 ^ 0x40000;	/* toggle bit 18, the AC bit */
  write_eflags(e1);
  e2 = read_eflags();
  write_eflags(e0);	/* put back the original */

  if (e0 == e2)
    {
      Fprintf(outfile,
	  "%s: could not toggle AC bit of eflags register"
	  " (is this a 386?)\n", prog);
      return FALSE;
    }
#endif	/* PMC_CHECK_FOR_OLD_PROCESSORS */

  /* Check for cpuid instruction - can the ID flag be changed?
   * This is not a definitive test for a 486; the cpuid instruction
   * was added in stepping 3.
   * Cyrix or NexGen, may need to enable cpuid instruction.
   * The NexGen Nx586 has the cpuid instruction but not the ID flag.
   */

  e0 = read_eflags();
  e1 = e0 ^ 0x200000;	/* toggle bit 21, the ID bit */
  write_eflags(e1);
  e2 = read_eflags();
  write_eflags(e0);	/* put back the original */

  if (e0 == e2)
    {
      Fprintf(outfile,
	  "%s: could not toggle ID bit of eflags register"
	  " (no cpuid instruction?)\n", prog);
      return FALSE;
    }

  /* If there is no cpuid instruction, the Cyrix 5/2 test and their Device
   * Identification Registers can be used to further identify the processor.
   * This code is derived from the Cyrix Application Note 112.
   * If you ever need an explanation of why software is expensive, this is it.
   */

#if 0
  /* Test for Cyrix CPU if there is no cpuid instruction */
  /* untested, and will remain so */
  if (something)
    {
      pmc_uint32_t eax;
      __asm__ __volatile__ (
	"xor %%eax,%%eax\n\t"	/* clear %eax */
	"sahf\n\t"		/* flags = %ah, but bit 1 is always 1 in flags */
	"movl $5,%%eax\n\t"	/* dividend */
	"movl $2,%%ebx\n\t"	/* divisor */
	"div %%bl\n\t"		/* do an operation that does not change flags */
	"lahf"			/* %ah = flags */
	: "=a" (eax)	/* output */
	:		/* input */
	: "b" 	/* clobbered */
      );
      if (eax == 2)
	{
	  cyrix = true;
	  /* check the Device Identification Registers
	   * ioperm() cannot be executed in user mode, so put this in /dev/pmc?
	   * see /usr/include/asm/io.h
	   */
	  unsigned char dir0, dir1;
	  ioperm(...);		/* set the port access permission bits */
	  outb(0xfe,0x22);	/* write 0xfe to i/o port 0x22 */
	  dir0 = inb(0x23);	/* read DIR0 (8 bits) from i/o port 0x23 */
	  outb(0xff,0x22);	/* write 0xff to i/o port 0x22 */
	  dir1 = inb(0x23);	/* read DIR1 (8 bits) from i/o port 0x23 */
	  /* interpret the bits using Cyrix tables (this is the hard part) */
	}
    }
#endif

  /* Some additional notes on the EFLAGS register.
   * The value of the ID bit is not relevant, only that it can or cannot be
   *   changed.  If a trap or interrupt occurs during the test, the value of
   *   EFLAGS could be reset by the interrupt handler, and the test would fail.
   * To see if alignment checking of memory references is enabled, inspect
   *   bit 18, the Alignment Check flag.  If 0, this feature is disabled.
   *   If 1, and the Alignment Mask flag (bit 18 of CR0) is also 1, then
   *   the processor will generate an Alignment Check exception for a
   *   misaligned memory reference at CPL = 3.
   * The original Pentium (P5) had a flaw whereby the AC flag may be
   *   inadvertently cleared after a fault in the fnsave instruction.
   */

#if defined(PMC_CHECK_ALIGNMENT) && (PMC_CHECK_ALIGNMENT > 1)
  /* The following code is incomplete, as it does not restore the AC or AM
   *   flags to their previous state when the program is finished.
   */

  /* set the Alignment Check flag */
  e0 = read_eflags();
  e1 = e0 | 0x40000;	/* set bit 18, the AC bit */
  write_eflags(e1);

  /* set the Alignment Mask flag */
  if (devpmc != -1)
  {
    lseek(devpmc, PMC_ENABLE_ALIGNMENT_CHECKING, SEEK_SET);
	/* ignore the return value */
  }
#endif	/* PMC_CHECK_ALIGNMENT */

  read_cpuid(0, &level,
    (pmc_uint32_t *)&vendor_id[0],
    (pmc_uint32_t *)&vendor_id[8],
    (pmc_uint32_t *)&vendor_id[4]);

  /* level is the largest input value accepted by the cpuid instruction
   * on this processor.  The following code assumes level is 0, 1, 2 or 3
   * except on early models of the processor.  Extended levels are provided
   * by AMD and Cyrix, and by the Pentium 4.
   */

  Fprintf(outfile, "processor information:\n  vendor id = '%s'\n", vendor_id);

  if (strcmp(vendor_id, "GenuineIntel") == 0) { intel     = TRUE; }
  else
  if (strcmp(vendor_id, "AuthenticAMD") == 0) { amd       = TRUE; }
  else
  if (strcmp(vendor_id, "CyrixInstead") == 0) { cyrix     = TRUE; }
  else
  if (strcmp(vendor_id, "GenuineTMx86") == 0) { transmeta = TRUE; }
  else
  if (strcmp(vendor_id, "UMC UMC UMC ") == 0) { umc       = TRUE; }
  else
  if (strcmp(vendor_id, "NexGenDriven") == 0) { nexgen    = TRUE; }
  else
  if (strcmp(vendor_id, "CentaurHauls") == 0) { centaur   = TRUE; }
  else
  if (strcmp(vendor_id, "RiseRiseRise") == 0) { rise      = TRUE; }
  else
  if (level >= 0x00000500)
    {
      /* sandpile.org: level = 0x000005??, no vendor string */
      Fprintf(outfile, "  A-step Pentium processor? (EAX = 0x%lx)\n", level);
      return FALSE;
    }
  else
    {
      Fprintf(outfile, "  vendor id not recognized\n");
      return FALSE;
    }

  if (level == 0)
    {
      Fprintf(outfile, "  cannot obtain required information\n");
      return FALSE;
    }


  /* now the standard level is at least 1 */

  read_cpuid(1, &signature, &brand, &dummy, &features);

  /*
   * signature is also the upper 32 bits of the 96-bit processor
   *   serial number obtained with input 3 on the Pentium III.
   * brand is not used by the AMD or Cyrix processors.
   */

  /* Note that the feature list is derived from the hardware, not from
   * the Linux kernel, which is capable of disabling or enabling some
   * features in software.  To see the software state, look at
   * boot_cpu_data.x86_capability (in arch/i386/kernel/setup.c) or
   * /proc/cpuinfo.
   */
							/* first Intel use */
  extended_family = (signature & 0x0ff00000) >> 20;	/* Pentium 4 */
  extended_model  = (signature & 0x000f0000) >> 16;	/* Pentium 4 */
  type            = (signature & 0x00003000) >> 12;
  family          = (signature & 0x00000f00) >> 8;
  model           = (signature & 0x000000f0) >> 4;
  stepping        = (signature & 0x0000000f);

  apic_id     = (brand & 0xff000000) >> 24;	/* Pentium 4 */
  cpu_count   = (brand & 0x00ff0000) >> 16;	/* Pentium 4 (per sandpile.org) */
  chunk_count = (brand & 0x0000ff00) >> 8;	/* Pentium 4 */
  brand_id    = (brand & 0x000000ff);		/* Pentium III model 8 */

  if (intel)
    {
      switch (family)
	{
	  /* case 3:
	   *   This case won't work, because the 386 processor signature
	   *   is only available at reset, and it has a different format.
	   *   See the Intel document AP-485 for more information.
	   */

	  /* case 4:
	   *	model	processor
	   *	0	486DX; 25,33				(1)
	   *	1	486DX; 50				(1)
	   *	2	486SX; no FPU; 16,20,25,33		(1)
	   *	3	487 or DX2 or DX2 OverDrive; 50,66	(1)
	   *	4	486SL; 20,25,33				(2)
	   *	5	SX2					(1)
	   *	7	Writeback-enhanced DX2			(2)
	   *	8	DX4 or DX4 OverDrive; 75,100		(2)
	   *	9	Writeback-enhanced DX4
	   *        (1) cpuid not implemented
	   *        (2) cpuid implemented since stepping 3
	   */

	  case 4:	/* 486 */
	    if (outfile != NULL)
	      {
		fprintf(outfile,
		  "  %s processor\n"
		  "  %s family, %s, stepping %lu\n",
		  Intel_Type[type],
		  Intel_Family[family], Intel_4_Model[model], stepping);
	      }
	    break;

	  case 5:	/* Pentium, Pentium/MMX */
	    if (outfile != NULL)
	      {
		fprintf(outfile,
		  "  %s processor\n"
		  "  %s family, %s, stepping %lu\n",
		  Intel_Type[type],
		  Intel_Family[family], Intel_5_Model[model], stepping);
		switch ( (signature & 0x3ff0) >> 4 )
		  {
		    case 0x050:	/* P5 */
		      fprintf(outfile,
			"  Pentium (60 or 66 MHz, A-step)\n");
		      break;
		    case 0x051:	/* P5 */
		      if (stepping <= 7)
		        { fprintf(outfile,
			    "  Pentium (60 or 66 MHz)\n");
			}
		      else
		        { fprintf(outfile,
			    "  Pentium OverDrive (120 or 133 MHz)\n");
			  /* mislabeled P54 */
			}
		      break;
		    case 0x151:	/* P54 */
		      fprintf(outfile,
			"  Pentium OverDrive (120 or 133 MHz)\n");
		      break;
		    case 0x052:	/* P54C */
		    case 0x252:
		    case 0x057:	/* mobile, 75,90,100 */
		      fprintf(outfile,
			"  Pentium (75-200 MHz) (or OverDrive)\n");
				/* 75,90,100,120,133,150,166,200 */
			/* signature 0x052c is either original or overdrive,
			 * cannot distinguish by cpuid alone; look at the
			 * label under the fan :-)
			 */
		      break;
		    case 0x152:	/* P54 */
		      fprintf(outfile,
			"  Pentium OverDrive (125-166 MHz)\n");
				/* 125,150,166 */
		      break;
		    case 0x153:	/* P24T */
		      fprintf(outfile,
			"  Pentium OverDrive (486 replacement, 63,83 MHz)\n");
		      break;
		    case 0x054:	/* P55C */
		    case 0x254:
		    case 0x058:	/* mobile, 166,200,233,266 */
		      fprintf(outfile,
			"  Pentium/MMX (120-300 MHz)\n");
				/* 120,133,150,166,200,233,266,300 */
		      break;
		    case 0x154:	/* P55 */
		      fprintf(outfile,
			"  Pentium/MMX OverDrive"
			" (Pentium 75-100 MHz replacement, 125-200 MHz)\n");
				/* 125,150,166,180,200 */
		      break;
		    case 0x155:
		      fprintf(outfile,
			"  Pentium OverDrive (486/DX4 replacement)\n");
		      break;
		    case 0x156:
		      fprintf(outfile,
			"  Pentium OverDrive\n");
		      break;
		    default:
		      fprintf(outfile,
			"  processor signature 0x%lx not recognized\n",
			signature);
		      break;
		  }
		fprintf(outfile,
		  "    (the above information may not be definitive)\n");
	      }
	    break;

	  case 6:	/* Pentium Pro/II/III */
	    if (outfile != NULL)
	      {
		fprintf(outfile,
		  "  %s processor\n"
		  "  %s family, %s, stepping %lu\n",
		  Intel_Type[type],
		  Intel_Family[family], Intel_6_Model[model], stepping);
		switch ( (signature & 0x3ff0) >> 4 )
		  {
		    case 0x060:
		      fprintf(outfile,
			"  Pentium Pro (A-step)\n");
		      break;
		    case 0x061:
		      fprintf(outfile,
			"  Pentium Pro\n");
				/* 150,166,180,200 */
		      break;
		    case 0x063:
		      fprintf(outfile,
			"  Pentium II (model 3)\n");
				/* 0.28 micron */
		      break;
		    case 0x163:
		      fprintf(outfile,
			"  Pentium II OverDrive (Pentium Pro replacement)\n");
		      break;
		    case 0x164:
		      fprintf(outfile,
			"  (reported, but unknown from Intel documents)\n");
		      break;
		    case 0x065:
		      fprintf(outfile,
			"  Celeron, Pentium II or Pentium II Xeon (model 5)\n");
		      /* these are incompletely distinguished by the L2 cache size:
		       *   0, Celeron
		       *   512 KB, Pentium II or Pentium II Xeon
		       *   1 or 2 MB, Pentium II Xeon
		       */
		      break;
		      		/* 0.25 micron */
		    case 0x066:
		      fprintf(outfile,
			"  Celeron (model 6)\n");
		      break;
		      		/* on-die L2 cache */
		    case 0x067:
		      fprintf(outfile,
			"  Celeron, Pentium III or Pentium III Xeon (model 7)\n");
		      /* these are incompletely distinguished by the L2 cache size:
		       *   ? KB, Celeron
		       *   512 KB, Pentium III or Pentium III Xeon
		       *   1 or 2 MB, Pentium III Xeon
		       */
		      break;
		      		/* 0.25 micron */
		    case 0x068:
		      fprintf(outfile,
			"  Celeron, Pentium III or Pentium III Xeon (model 8)\n");
		      break;
		      		/* 0.18 micron, 256 KB on-die L2 cache */
		    case 0x06a:
		      fprintf(outfile,
			"  Pentium III or Pentium III Xeon (model A)\n");
		      break;
		      		/* 0.18 micron, 1 or 2 MB on-die L2 cache */
		    default:
		      fprintf(outfile,
			"  processor signature 0x%lx not recognized\n"
			, signature);
		      break;
		  }
	      }
	    break;

	  case 15:	/* Pentium 4 */
	    if (outfile != NULL)
	      {
		fprintf(outfile,
		  "  %s processor\n"
		  "  %s family, %s, stepping %lu\n",
		  Intel_Type[type],
		  Intel_Extended_Family[extended_family],
		  (model < 15) ?  Intel_15_Model[model]
				: Intel_15_Extended_Model[extended_model],
		  stepping);
		switch ( (signature & 0x3ff0) >> 4 )
		  {
		    case 0x0f0:
		      fprintf(outfile,
			"  Pentium 4\n");
		      break;
		    default:
		      fprintf(outfile,
			"  processor signature 0x%lx not recognized\n"
			, signature);
		      break;
		  }
		if (features & 0x80000) {	/* bit 19 */
		  fprintf(outfile, "    CLFLUSH 8-byte chunk count = %lu\n", chunk_count);
		  /* cache line size = 8*chunk_count */
		}
		fprintf(outfile, "    logical processor count = %lu\n", cpu_count);
		fprintf(outfile, "    local APIC physical ID = %lu\n", apic_id);
	      }
	    break;

	  default:
	    if (outfile != NULL)
	      {
		fprintf(outfile,
		  "   processor signature = 0x%lx\n", signature);
		fprintf(outfile,
		  "   type = 0x%lx, family = 0x%lx,"
		  " model = 0x%lx, stepping = 0x%lx\n",
		  type, family, model, stepping);
		fprintf(outfile,
		  "  Time Stamp Counter not implemented on this processor\n");
	      }
	    return FALSE;
	}  /* switch(family) */

      if ((0 < brand_id) && (brand_id < (pmc_uint32_t)size_Intel_Brand))
	{
	  Fprintf(outfile, "    brand = %s\n", Intel_Brand[brand_id]);
	}
    }	/* intel */

  if ((intel && (family == 15)) || amd || cyrix)
    {
      /* extended levels */
      read_cpuid(0x80000000, &extended_level, &dummy, &dummy, &dummy);

      if (extended_level >= 0x80000004)
	{
	  pmc_uint32_t v[13]; /* name string */
	  read_cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
	  read_cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
	  read_cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
	  v[12] = 0; /* terminate the string */

	  Fprintf(outfile, "  %s\n", (char *) v);
	}

      if (extended_level >= 0x80000001)
	{
	  read_cpuid(0x80000001, &extended_signature, &dummy, &dummy, &extended_features);

	  /* The extended signature structure is the same as the standard
	   * signature structure, but the values can be different.
	   */
          Fprintf(outfile, "    standard signature, features 0x%08lx 0x%08lx\n",
	    signature, features);
	  Fprintf(outfile, "    extended signature, features 0x%08lx 0x%08lx\n",
	    extended_signature, extended_features);
	}
    }	/* (intel && (family == 15)) || amd || cyrix */

  if (amd)
    {
      /* from AMD and sandpile.org */
      /* family = 4: model = 3: Am486DX2     (no cpuid)
       *                     7: Am486DX2WB   (no cpuid)
       *                     8: Am486DX4
       *                     9: Am486DX4WB
       *                     E: Am5x86
       *                     F: Am5x86WB
       *          5: model = 0: AMD-K5, PR 75, 90, 100
       *                     1: AMD-K5, PR 120, 133
       *                     2: AMD-K5, PR 166
       *                     3: AMD-K5, PR 200
       *                     6: AMD-K6, PR 166 ... 266
       *                     7: AMD-K6, PR 166 ... 300
       *                     8: AMD-K6-2, PR 200 ... 450
       *                     9: AMD-K6-III, PR 200 ... 450
       *          6: model = 1: AMD Athlon
       *                     2: AMD Athlon
       *                     4: AMD Athlon
       */

      if (outfile != NULL)
	{
	  switch (family)
	    {
	      case 4:
	        fprintf(outfile, "    AMD %s\n", AMD_4_Model[model]);
	        break;
	      case 5:
	        fprintf(outfile, "    AMD %s\n", AMD_5_Model[model]);
	        break;
	      case 6:
	        fprintf(outfile, "    AMD %s\n", AMD_6_Model[model]);
	        break;
	      default:
	        break;
	    } /* switch (family) */
	}
    }	/* amd */

  if (cyrix)
    {
      /* level	type, family, model
       *		Cx486SLC	(1)
       *		Cx486DLC	(1)
       *		Cx486SRx2	(1)
       *		Cx486DRx2	(1)
       *		Cx486S		(1)
       *		Cx486DX		(1)
       *		Cx486DX2	(1)
       *	049	5x86		(1) (like i486DX4)
       *   1	052	6x86		(2)
       *   1	052	6x86L		(2)
       *   1	044	MediaGX		(2)
       *   1	060	6x86MX		(3)
       *   1	060	MII		(3)
       *   2	054	GXm		(3) (4)
       *   2	065	VIA Cyrix III	(3) (4)
       *		    (1) no cpuid instruction
       *		    (2) cpuid initially off
       *		    (3) cpuid initially on
       *		    (4) cpuid extended levels
       */

      Fprintf(outfile, "  Cyrix information not yet implemented\n");
      ret = FALSE;
    }	/* cyrix */

  if (transmeta)
    {
      Fprintf(outfile, "  Transmeta information not yet implemented\n");
      ret = FALSE;
    }	/* transmeta */

  if (umc)
    {
      /* family = 4: model 1, U5D; 2, U5S
       *          5:
       *          6:
       */
      Fprintf(outfile,
	  "  United Microelectronics Corp. information not yet implemented\n");
      ret = FALSE;
    }	/* umc */	/* a 486 clone from Taiwan */

  if (nexgen)
    {
      /* family = 4:
       *          5: model 0, Nx586
       *          6:
       */
      Fprintf(outfile, "  NexGen information not yet implemented\n");
      ret = FALSE;
    }	/* nexgen */	/* now part of AMD */

  if (centaur)
    {
      /* The Centaur/IDT vendor id is programmable and unreliable;
       * see Mazur's web page for an identification algorithm.
       * Extended levels 0x80000000 .. 0x80000004 are supported.
       * AMD's 3DNow! instructions are supported.
       */

      /* family = 4:
       *          5: model 4, C6; model 8, C2; model 9, C3
       *          6:
       */
      Fprintf(outfile, "  IDT Centaur information not yet implemented\n");
      ret = FALSE;
    }	/* centaur */

  if (rise)
    {
      /* family = 4:
       *          5: mP6 (model 0, 0.25 micron; model 2, 0.18 micron)
       *          6:
       */
      Fprintf(outfile, "  Rise Technology information not yet implemented\n");
      ret = FALSE;
    }	/* rise */


  Fprintf(outfile, "  Cycle and event counters, access method\n");

  /* verify that the Time Stamp Counter is implemented */
  if (features & 0x10)		/* TSC flag, bit 4 */
    {
      /*
       * The Time Stamp Counter is implemented.
       * Now check the bit CR4.TSD (Time Stamp Disable) to verify that
       *   the rdtsc instruction will work from user mode (CPL = 3).
       * CR4.TSD is bit 2 of CR4.  It is cleared at system reset or init.
       * CR4.TSD = 1 --> can use rdtsc only when CPL = 0.
       * CR4.TSD = 0 --> can use rdtsc from any CPL.
       * but, CR4 itself can only be read or changed when CPL = 0.
       */

      if (devpmc != -1)
	{
	  if (lseek(devpmc, PMC_QUERY_RDTSC, SEEK_SET) == 1)  /* CR4.TSD */
	    {
	      Fprintf(outfile, "    rdtsc instruction from CPL = 0 only\n");
	      ret = FALSE;
	    }
	  else
	    { Fprintf(outfile, "    rdtsc instruction from any CPL\n"); }
	}
      else
	{ Fprintf(outfile, "    no information from /dev/pmc about rdtsc\n"); }
    }
  else
    {
      Fprintf(outfile,
	  "  Time Stamp Counter not implemented on this processor\n");
      ret = FALSE;
    }

  /* verify that the Model-Specific Registers are implemented */
  if (features & 0x20)		/* MSR flag, bit 5 */
    {
#ifdef PMC_READ_KERNEL_MODE
      /* rdmsr instruction: Pentium, Pentium/MMX, Pentium Pro/II/III */

      Fprintf(outfile, "    rdmsr instruction from CPL = 0 only\n");
#else
      /* rdpmc instruction: Pentium/MMX, Pentium Pro/II/III */

      /*
       * The Model-Specific Registers are implemented.
       * Now check the bit CR4.PCE (Performance-Monitoring Counter Enable)
       *   to verify that the rdpmc instruction will work from user mode
       *   (CPL = 3).
       * CR4.PCE is bit 8 of CR4.  It is cleared at system reset or init.
       * CR4.PCE = 0 --> can use rdpmc only when CPL = 0.
       * CR4.PCE = 1 --> can use rdpmc from any CPL.
       * but, CR4 itself can only be read or changed when CPL = 0.
       *
       * See the Pentium Family Developer's Manual (1997) and the Pentium
       * Pro Specification Update if System Management Mode is used with
       * CR4.PCE = 1.  The processor will erroneously enter shutdown state
       * when returning from SMM.  This apparently occurs only with cpuid
       * signatures (P5) [02]544, 154[34], (P6) 061[1267].  The problems
       * with SMM relative to the TSC and PMC's are far more extensive
       * than just this one bit.
       */

      if (devpmc != -1)
	{
	  if (lseek(devpmc, PMC_QUERY_RDPMC, SEEK_SET) == 0)  /* CR4.PCE */
	    {
	      Fprintf(outfile,
	             "    rdpmc instruction from CPL = 0 only\n"
	             "      (you could run pmc_query and then pmc_enable\n"
	             "       or recompile with -DPMC_READ_KERNEL_MODE)\n");
	      ret = FALSE;
	    }
	  else
	    { Fprintf(outfile, "    rdpmc instruction from any CPL\n"); }
	}
      else
	{ Fprintf(outfile, "    no information from /dev/pmc about rdpmc\n"); }
#endif	/* PMC_READ_KERNEL_MODE */
    }
  else
    {
      Fprintf(outfile, "  Model-Specific Registers not implemented on this processor\n");
      ret = FALSE;
    }


  /* Intel cache configuration descriptors */
  if (intel && level >= 2)
    {
      pmc_uint32_t Config[4], Conf[16], f, g, trials = 1;
      Fprintf(outfile, "  Cache configuration\n");
      while (trials > 0)  /* this is not supposed to be an infinite loop */
	{
	  /* a multiprocessor should not change processors in this loop */

	  read_cpuid(2, &Config[0], &Config[1], &Config[2], &Config[3]);
	  trials = (Config[0] & 0xff) - 1;	/* AL register */
	  Config[0] &= 0xffffff00;		/* clear AL */
	  for (i = 0; i < 16; i++)
	    { Conf[i] = 0; }
	  for (i = 0, j = 0; i < 4; i++)
	    {
	      if (Config[i] & 0x80000000)	/* validity check */
		{ continue; }
	      for (g = Config[i]; g ; g >>= 8)
		{
		  f = g & 0xff;   /* descriptor code */
		  if (f)
		    {
		      /* insertion sort, cannot overflow Conf[] */
		      for (k = j++; 0 < k && f < Conf[k-1]; k--)
			{ Conf[k] = Conf[k-1]; }
		      Conf[k] = f;
		    }
		}
	    }
	  for (i = 0, k = 0; i < j; i++)
	    {
	      f = Conf[i];
	      for ( ; k < size_Intel_Cache; k++)
		{
		  if (f == Intel_Cache[k].code)
		    {
		      L1_i_size += Intel_Cache[k].L1_i_size;
		      L1_d_size += Intel_Cache[k].L1_d_size;
		      L2_size += Intel_Cache[k].L2_size;
		      L3_size += Intel_Cache[k].L3_size;
		      Fprintf(outfile, "    %s\n", Intel_Cache[k++].descriptor);
		      break;
		    }
		}
	    }
	}
    }

  if (intel && level >= 1)
    {
      pmc_uint32_t f;
      Fprintf(outfile, "  Indicated features\n");
      for (i = 0, f = 1; i < 32; i++, f <<= 1)
	{
	  if (features & f)
	    {
	      if (i == 11 && signature < 0x633)
		{
		  /* Fast System Call feature not actually implemented
		   * on Pentium Pro
		   */
		  continue;
		}
	      Fprintf(outfile, "    %2d %s\n", i, Intel_Feature[i]);
	    }
	}
    }

  if (amd && extended_level >= 0x80000005)
    {
      /* level 1 cache */
      pmc_uint32_t TLBm, TLB, L1i, L1d, t;

      read_cpuid(0x80000005, &TLBm, &TLB, &L1d, &L1i);
      /* TLBm is for 2/4 MB pages, TLB is for 4 KB pages */

      Fprintf(outfile, "  Cache configuration\n");

      /* Athlon only */
      Fprintf(outfile, "    L1 instr TLB, 4M page, ");
      if ((t = (TLBm >> 8) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_entries, TLBm & 0xff);

      /* Athlon only */
      Fprintf(outfile, "    L1 data  TLB, 4M page, ");
      if ((t = (TLBm >> 24) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_entries, (TLBm >> 16) & 0xff);

      Fprintf(outfile, "    L1 instr TLB, 4K page, ");
      if ((t = (TLB >> 8) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_entries, TLB & 0xff);

      Fprintf(outfile, "    L1 data  TLB, 4K page, ");
      if ((t = (TLB >> 24) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_entries, (TLB >> 16) & 0xff);

      Fprintf(outfile, "    L1 instr cache, %ld KB, ", t = (L1i >> 24) & 0xff);
      L1_i_size += t;
      if ((t = (L1i >> 16) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_lines, (L1i >> 8) & 0xff, L1i & 0xff);

      Fprintf(outfile, "    L1 data  cache, %ld KB, ", t = (L1d >> 24) & 0xff);
      L1_d_size += t;
      if ((t = (L1d >> 16) & 0xff) == 0xff)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_lines, (L1d >> 8) & 0xff, L1d & 0xff);
    }

  if (amd && extended_level >= 0x80000006)
    {
      /* level 2 cache */
      pmc_uint32_t TLBm, TLB, L2, t = 0xbabef00d;

      read_cpuid(0x80000006, &TLBm, &TLB, &L2, &dummy);
      /* TLBm is for 2/4 MB pages, TLB is for 4 KB pages */

      /* Athlon only */
      if (TLBm & 0xffff0000)
	{
	  /* split TLB */
	  Fprintf(outfile, "    L2 instr TLB, 4M page, ");
	  if ((t = (TLBm >> 12) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, TLBm & 0xfff);

	  Fprintf(outfile, "    L2 data  TLB, 4M page, ");
	  if ((t = (TLBm >> 28) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, (TLBm >> 16) & 0xfff);
	}
      else
      if (TLBm & 0xffff)
	{
	  /* unified TLB */
	  Fprintf(outfile, "    L2 unified TLB, 4M page, ");
	  if ((t = (TLBm >> 12) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, TLBm & 0xfff);
	}

      /* Athlon only */
      if (TLB & 0xffff0000)
	{
	  /* split TLB */
	  Fprintf(outfile, "    L2 instr TLB, 4K page, ");
	  if ((t = (TLB >> 12) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, TLB & 0xfff);

	  Fprintf(outfile, "    L2 data  TLB, 4K page, ");
	  if ((t = (TLB >> 28) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, (TLB >> 16) & 0xfff);
	}
      else
      if (TLB & 0xffff)
	{
	  /* unified TLB */
	  Fprintf(outfile, "    L2 unified TLB, 4K page, ");
	  if ((t = (TLB >> 12) & 0xf) == 0xf)
	    { Fprintf(outfile, amd_full); }
	  else
	    { Fprintf(outfile, amd_nway, t); }
	  Fprintf(outfile, amd_entries, TLB & 0xfff);
	}

      /* K6-III, Athlon only */
      Fprintf(outfile, "    L2 unified cache, %ld KB, ", t = (L2 >> 16) & 0xffff);
      L2_size += t;
      if ((t = (L2 >> 12) & 0xf) == 0xf)
	{ Fprintf(outfile, amd_full); }
      else
	{ Fprintf(outfile, amd_nway, t); }
      Fprintf(outfile, amd_lines, (L2 >> 8) & 0xf, L2 & 0xff);
    }

  if (amd && level >= 1)
    {
      pmc_uint32_t f, g = (features | extended_features);
      Fprintf(outfile, "  Indicated features\n");
      for (i = 0, f = 1; i < 32; i++, f <<= 1)
	{
	  if (i == 11)
	    {
	      if (features & f)
	        { Fprintf(outfile, "    11 Sysenter/Sysexit Instructions\n"); }
	      if (extended_features & f)
	        { Fprintf(outfile, "    11 Syscall/Sysret Instructions\n"); }
	    }
	  else if (g & f)
	    { Fprintf(outfile, "    %2d %s\n", i, AMD_Feature[i]); }

	}
    }

  /* Cyrix cache configuration descriptors */
  if (cyrix && level >= 2)
    {
      /* TLB information in eax, L1 cache information in edx */
      pmc_uint32_t TLB, L1;
      Fprintf(outfile, "  Cache configuration\n");
      read_cpuid(2, &TLB, &dummy, &dummy, &L1);
      /* TLB & 0xff, trials field == 1
       * (TLB >> 8) & 0xff, code 70 = 32-entry, 4-way, 4 KB pages
       * L1 & 0xff, code 80 = 16 KB, 4-way, 16 bytes per line
       */
      /* ... */
      L1_d_size = 16;	/* it is actually a unified cache */
    }

  if (cyrix && extended_level >= 0x80000005)
    {
      pmc_uint32_t TLB, L1;
      Fprintf(outfile, "  Cache configuration, extended information\n");
      read_cpuid(0x80000005, &dummy, &TLB, &L1, &dummy);
      /* TLB & 0xff, trials field == 1
       * (TLB >> 8) & 0xff, code 70 = 32-entry, 4-way, 4 KB pages
       * L1 & 0xff, code 80 = 16 KB, 4-way, 16 bytes per line
       */
      /* ... */
    }

  if (cyrix && level >= 1)
    {
      Fprintf(outfile, "  Indicated features\n");
      /* standard features in Intel_Feature[]
       * extended features in Cyrix_Feature[]
       */
      /* ... */
    }

#ifdef PMC_SERIAL_NUMBER
  if (intel && level >= 3)
    {
      /* Verify that the Processor Serial Number is implemented and enabled.
       * This feature is supported only by the Pentium III.
       * See Intel's AP-909 for instructions on how to disable this feature;
       * it is usually disabled in Linux.
       */
      if (features & 0x40000)		/* PN flag, bit 18 */
	{
	  /* 96 bits from signature : edx : ecx */
	  pmc_uint32_t middle = 0, lower = 0;
	  read_cpuid(3, &dummy, &dummy, &lower, &middle);
	  /* print according to Intel's specification in AP-485 and AP-909 */
	  Fprintf(outfile,
	    "  processor serial number %04lX-%04lX-%04lX-%04lX-%04lX-%04lX\n",
	    (signature >> 16) & 0xffff, signature & 0xffff,
	    (middle    >> 16) & 0xffff, middle    & 0xffff,
	    (lower     >> 16) & 0xffff, lower     & 0xffff
	    );
	}
    }
#endif

  PMC_VERBOSE_OUT(pmc_verify_processor)

  return ret;
}

/*----------------------------------------------------------------------------*/

/* private interface */

#if defined(PMC_CHECK_ALIGNMENT)
/*
 * The pmc_data_t type is 32 bytes, so it fully occupies one cache line
 * IF it is padded and aligned properly.
 *
 * Without using __attribute__ (( aligned(x) )) in pmc_lib.h [-DPMC_UNALIGNED]
 *
 *    size and alignment of various types
 *      pmc_uint32_t         4       4
 *      pmc_uint64_t         8       4
 *      pmc_data_t          32       4
 *      pmc_counter_t      320       4
 *
 *    size of various objects
 *                                                        forced align
 *                              static          static          static
 *                      global  global   local   local   local   local
 *      pmc_uint32_t         4       4       4       4       4       4
 *      pmc_uint64_t         8       8       8       8       8       8
 *      pmc_data_t          32      32      32      32      32      32
 *      pmc_counter_t      320     320     320     320     320     320
 *
 *    alignment of various objects
 *                                                        forced align
 *                              static          static          static
 *                      global  global   local   local   local   local
 *      pmc_uint32_t         4       4       4       4       4       4
 *      pmc_uint64_t         4       4       4       4       4       4
 *      pmc_data_t           4       4       4       4       4       4
 *      pmc_counter_t        4       4       4       4       4       4
 *
 * With __attribute__ (( aligned(x) )) in pmc_lib.h
 *
 *    size and alignment of various types
 *      pmc_uint32_t         4       4
 *      pmc_uint64_t         8       8
 *      pmc_data_t          32      32
 *      pmc_counter_t      320      32
 *
 *    size of various objects
 *                                                        forced align
 *                              static          static          static
 *                      global  global   local   local   local   local
 *      pmc_uint32_t         4       4       4       4       4       4
 *      pmc_uint64_t         8       8       8       8       8       8
 *      pmc_data_t          32      32      32      32      32      32
 *      pmc_counter_t      320     320     320     320     320     320
 *
 *    alignment of various objects
 *                                                        forced align
 *                              static          static          static
 *                      global  global   local   local   local   local
 *      pmc_uint32_t         4       4       4       4       4       4
 *      pmc_uint64_t         8       8       8       8       8       8
 *      pmc_data_t          32      32       4      32       4      32
 *      pmc_counter_t       32      32       4      32       4      32
 */

#define M 2

char x0;
pmc_uint32_t a32;
static char x1;
static pmc_uint32_t b32;

char x2;
pmc_uint64_t a64;
static char x3;
static pmc_uint64_t b64;

char x4;
pmc_data_t ad;
static char x5;
static pmc_data_t bd;

char x6;
pmc_counter_t ac;
static char x7;
static pmc_counter_t bc;

pmc_counter_t r;
pmc_counter_t ra[M];
static pmc_counter_t s;
static pmc_counter_t sa[M];

pmc_data_t rd;
pmc_data_t rda[M];
static pmc_data_t sd;
static pmc_data_t sda[M];

int pmc_verify_alignment(FILE * outfile, const char * const prog)
{
  int ret = TRUE;

  char x8;
  pmc_uint32_t c32;
  static char x12;
  static pmc_uint32_t d32;

  char x9;
  pmc_uint64_t c64;
  static char x13;
  static pmc_uint64_t d64;

  char x10;
  pmc_data_t cd;
  static char x14;
  static pmc_data_t dd;

  char x11;
  pmc_counter_t cc;
  static char x15;
  static pmc_counter_t dc;

  char y8;
  pmc_uint32_t e32 PMC_ALIGN(sizeof(pmc_uint32_t));
  static char y12;
  static pmc_uint32_t f32 PMC_ALIGN(sizeof(pmc_uint32_t));

  char y9;
  pmc_uint64_t e64 PMC_ALIGN(sizeof(pmc_uint64_t));
  static char y13;
  static pmc_uint64_t f64 PMC_ALIGN(sizeof(pmc_uint64_t));

  char y10;
  pmc_data_t ed PMC_ALIGN(PMC_CACHE_LINE);
  static char y14;
  static pmc_data_t fd PMC_ALIGN(PMC_CACHE_LINE);

  char y11;
  pmc_counter_t ec PMC_ALIGN(PMC_CACHE_LINE);
  static char y15;
  static pmc_counter_t fc PMC_ALIGN(PMC_CACHE_LINE);

  pmc_counter_t t;
  pmc_counter_t ta[M];
  pmc_counter_t * p = NULL;

  pmc_data_t td;
  pmc_data_t tda[M];
  pmc_data_t * pd = NULL;

  int i;
  pmc_uint32_t mask = PMC_CACHE_LINE - 1;
	/* assuming PMC_CACHE_LINE is a power of 2 */

  PMC_VERBOSE_IN(pmc_verify_alignment)

  fprintf(outfile, "size and alignment of various types\n");
  fprintf(outfile, "  pmc_uint32_t\t%8d%8d\n",
    sizeof(pmc_uint32_t),  __alignof__(pmc_uint32_t));
  fprintf(outfile, "  pmc_uint64_t\t%8d%8d\n",
    sizeof(pmc_uint64_t),  __alignof__(pmc_uint64_t));
  fprintf(outfile, "  pmc_data_t\t%8d%8d\n",
    sizeof(pmc_data_t),    __alignof__(pmc_data_t));
  fprintf(outfile, "  pmc_counter_t\t%8d%8d\n",
    sizeof(pmc_counter_t), __alignof__(pmc_counter_t));
  fprintf(outfile, "\n");

  fprintf(outfile, "size of various objects\n");
  fprintf(outfile, "\t\t                                    forced align\n");
  fprintf(outfile, "\t\t          static          static          static\n");
  fprintf(outfile, "\t\t  global  global   local   local   local   local\n");
  fprintf(outfile,
    "  pmc_uint32_t\t%8d%8d%8d%8d%8d%8d\n",
    sizeof(a32), sizeof(b32),
    sizeof(c32), sizeof(d32),
    sizeof(e32), sizeof(f32));
  fprintf(outfile,
    "  pmc_uint64_t\t%8d%8d%8d%8d%8d%8d\n",
    sizeof(a64), sizeof(b64),
    sizeof(c64), sizeof(d64),
    sizeof(e64), sizeof(f64));
  fprintf(outfile,
    "  pmc_data_t\t%8d%8d%8d%8d%8d%8d\n",
    sizeof(ad ), sizeof(bd ),
    sizeof(cd ), sizeof(dd ),
    sizeof(ed ), sizeof(fd ));
  fprintf(outfile,
    "  pmc_counter_t\t%8d%8d%8d%8d%8d%8d\n",
    sizeof(ac ), sizeof(bc ),
    sizeof(cc ), sizeof(dc ),
    sizeof(ec ), sizeof(fc ));
  fprintf(outfile, "\n");

  fprintf(outfile, "alignment of various objects\n");
  fprintf(outfile, "\t\t                                    forced align\n");
  fprintf(outfile, "\t\t          static          static          static\n");
  fprintf(outfile, "\t\t  global  global   local   local   local   local\n");
  fprintf(outfile,
    "  pmc_uint32_t\t%8d%8d%8d%8d%8d%8d\n",
    __alignof__(a32), __alignof__(b32),
    __alignof__(c32), __alignof__(d32),
    __alignof__(e32), __alignof__(f32));
  fprintf(outfile,
    "  pmc_uint64_t\t%8d%8d%8d%8d%8d%8d\n",
    __alignof__(a64), __alignof__(b64),
    __alignof__(c64), __alignof__(d64),
    __alignof__(e64), __alignof__(f64));
  fprintf(outfile,
    "  pmc_data_t\t%8d%8d%8d%8d%8d%8d\n",
    __alignof__(ad ), __alignof__(bd ),
    __alignof__(cd ), __alignof__(dd ),
    __alignof__(ed ), __alignof__(fd ));
  fprintf(outfile,
    "  pmc_counter_t\t%8d%8d%8d%8d%8d%8d\n",
    __alignof__(ac ), __alignof__(bc ),
    __alignof__(cc ), __alignof__(dc ),
    __alignof__(ec ), __alignof__(fc ));
  fprintf(outfile, "\n");

  if (((pmc_uint32_t)(&r) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" global pmc_counter_t alignment (0x%08x)\n", (unsigned int)&r);
    }

  if (((pmc_uint32_t)(&s) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" static pmc_counter_t alignment (0x%08x)\n", (unsigned int)&s);
    }

  if (((pmc_uint32_t)(&t) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	"  local pmc_counter_t alignment (0x%08x)\n", (unsigned int)&t);
    }

  for (i = 0; i < M; i++)
    {
      if (((pmc_uint32_t)(&ra[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    " global pmc_counter_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&ra[i]);
	}

      if (((pmc_uint32_t)(&sa[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    " static pmc_counter_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&sa[i]);
	}

      if (((pmc_uint32_t)(&ta[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    "  local pmc_counter_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&ta[i]);
	}
    }

  p = pmc_counter_alloc(M);

  if (((pmc_uint32_t)(p) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" dynamic pmc_counter_t alignment by pmc_counter_alloc() (0x%08x)\n",
	(unsigned int)p);
    }

  if (sizeof(pmc_data_t) != PMC_CACHE_LINE && sizeof(pmc_data_t) != 2*PMC_CACHE_LINE)
    {
      fprintf(outfile, "warning,"
	" pmc_data_t size\n");
    }

  if (((pmc_uint32_t)(&rd) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" global pmc_data_t alignment (0x%08x)\n", (unsigned int)&rd);
    }

  if (((pmc_uint32_t)(&sd) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" static pmc_data_t alignment (0x%08x)\n", (unsigned int)&sd);
    }

  if (((pmc_uint32_t)(&td) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	"  local pmc_data_t alignment (0x%08x)\n", (unsigned int)&td);
    }

  for (i = 0; i < M; i++)
    {
      if (((pmc_uint32_t)(&rda[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    " global pmc_data_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&rda[i]);
	}

      if (((pmc_uint32_t)(&sda[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    " static pmc_data_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&sda[i]);
	}

      if (((pmc_uint32_t)(&tda[i]) & mask) != 0)
	{
	  fprintf(outfile, "warning,"
	    "  local pmc_data_t array[%d] alignment (0x%08x)\n",
	    i, (unsigned int)&tda[i]);
	}
    }

  pd = pmc_data_alloc(M);

  if (((pmc_uint32_t)(pd) & mask) != 0)
    {
      fprintf(outfile, "warning,"
	" dynamic pmc_data_t alignment by pmc_data_alloc() (0x%08x)\n",
	(unsigned int)pd);
    }

  PMC_VERBOSE_OUT(pmc_verify_alignment)

  return ret;
}
#else	/* PMC_CHECK_ALIGNMENT */
int pmc_verify_alignment(FILE * outfile, const char * const prog)
{
  PMC_VERBOSE_([inactive] pmc_verify_alignment)

  return TRUE;
}
#endif	/* PMC_CHECK_ALIGNMENT */

/*----------------------------------------------------------------------------*/

static int closest_Speed(double est)
{
  int i, s = 0;
  double speed;

  for (i = 0; i < size_Speed; i++)
    {
      /* come within 2.5% of a tabulated speed */
      speed = (double) Speed[i];
      if (fabs(est - speed) < 0.025 * speed)
	{ s = Speed[i]; break; }
    }

  return s;
}

/*----------------------------------------------------------------------------*/

/* private interface */

/* with the -Verbose option,
 *   output to stdout instead of stderr; see pmc_getargs.c for usage
 *   use the largest estimate by several methods
 */

#define rdtsc(a) PMC_ASM_READ_TSC(a)

int pmc_guess_mhz(const int seconds)
{
  int mhz = 0, mhz_est;
  pmc_uint64_t t0, t1;
  double est;

  PMC_VERBOSE_IN(pmc_guess_mhz)

  /* unsigned int sleep(unsigned int seconds); */
  if (seconds > 0)
    {
      unsigned int remaining, sec = (unsigned int) seconds;

      rdtsc(t0);
      remaining = sleep(sec);
      rdtsc(t1);

      if (remaining != sec)
	{
	  est = (double)(t1.bits64 - t0.bits64)
	      / (double)(sec - remaining)
	      / 1000000.0;

#ifdef PMC_VERBOSE
	  printf("  est by sleep(%d) [%d remaining] = %f\n", sec, remaining, est);
#endif

	  if (mhz < (mhz_est = closest_Speed(est))) { mhz = mhz_est; }
	}
    }

#ifdef PMC_VERBOSE
  /* void usleep(unsigned long usec); */
  /* There are no guarantees and no feedback about the actual delay.
   * The results could be misleading.
   */
  if (seconds > 0)
    {
      unsigned long usec;
      usec = (unsigned long)1000000 * (unsigned long)seconds;

      rdtsc(t0);
      usleep(usec);
      rdtsc(t1);

      est = (double)(t1.bits64 - t0.bits64)
	  / (double)usec;

      printf("  est by usleep(%d) = %f\n", (int)usec, est);

      if (mhz < (mhz_est = closest_Speed(est))) { mhz = mhz_est; }
    }

  /* int nanosleep(const struct timespec *req, struct timespec *rem); */
  /* struct timespec { time_t tv_sec; long tv_nsec; }; */
  if (seconds > 0)
    {
      struct timespec request = {0, 0}, remaining = {0, 0};

      request.tv_sec = (time_t) seconds;

      rdtsc(t0);
      nanosleep(&request, &remaining);
      rdtsc(t1);

      if (remaining.tv_sec != request.tv_sec)
	{
	  est = (double)(t1.bits64 - t0.bits64)
	      / ( (double)(request.tv_sec - remaining.tv_sec)
		+ (double)(request.tv_nsec - remaining.tv_nsec)/1000000000.0
		)
	      / 1000000.0;

	  printf("  est by nanosleep(%d) [%ld.%06ld remaining] = %f\n",
	    seconds, remaining.tv_sec, remaining.tv_nsec/1000, est);

	  if (mhz < (mhz_est = closest_Speed(est))) { mhz = mhz_est; }
	}
    }

  /* time_t time(time_t *t); */
  if (seconds > 0)
    {
      time_t t_now, t_later;

      /* wait for the start of a new second */
      t_now = time(NULL);
      while(t_now == (t_later = time(NULL)));

      /* set the stop time */
      t_later += (time_t) seconds;

      /* do the measurement */
      rdtsc(t0);
      while(t_later > time(NULL));
      rdtsc(t1);

      est = (double)(t1.bits64 - t0.bits64)
	  / (double)seconds
	  / 1000000.0;

      printf("  est by time() = %f\n", est);

      if (mhz < (mhz_est = closest_Speed(est))) { mhz = mhz_est; }
    }

  /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
  /* struct timeval { long tv_sec; long tv_usec; }; */
  if (seconds > 0)
    {
      struct timeval t_now, t_later;
      long later;

      /* wait for the start of a new second */
      gettimeofday(&t_now, NULL);
      do {
	gettimeofday(&t_later, NULL);
      } while (t_now.tv_sec == t_later.tv_sec);

      /* set the stop time */
      later = t_later.tv_sec + (long) seconds;

      /* do the measurement */
      rdtsc(t0);
      do {
	gettimeofday(&t_later, NULL);
      } while(later > t_later.tv_sec);
      rdtsc(t1);

      est = (double)(t1.bits64 - t0.bits64)
	  / (double)seconds
	  / 1000000.0;

      printf("  est by gettimeofday() = %f\n", est);

      if (mhz < (mhz_est = closest_Speed(est))) { mhz = mhz_est; }
    }
#endif	/* PMC_VERBOSE */

  PMC_VERBOSE_OUT(pmc_guess_mhz)

  return mhz;
}

/*----------------------------------------------------------------------------*/
