如果不能正常显示,请查看原文 , 或返回

ARMv8/aarch64下TSC(Time Stamp Counter)读取方法 – Linux Kernel Exploration

      在x86架构中,我们对Time Stamp Counter (TSC) 寄存器非常熟悉,通过这个寄存器对代码执行时间的衡量可精确到CPU Cycle级别。

      但在ARM/ARMv8/aarch64架构中,并没有与x86 TSC对应的寄存器和直接对应的汇编指令rdtsc。

      若想在ARMv8架构中,统计计算代码执行时间达到CPU Cycle级别,也需要读取类似x86的TSC寄存器。在ARMv8中,有Performance Monitors Control Register系列寄存器,其中PMCCNTR_EL0就类似于x86的TSC寄存器。本文介绍Linux下读取ARM TSC方法。

      读取这个PMCCNTR_EL0寄存器值,就可以知道当前CPU已运行了多少Cycle。但在ARM下读取CPU Cycle和x86有所不同:

    1、x86用户态代码可以随便读取TSC值。但在ARM,默认情况是用户态是不可以读的,需要在内核态使能后,用户态才能读取

       开关在由寄存器PMCR_EL0控制。实际上这个寄存器控制整个PMU寄存器在用户态是否可读写,不仅仅是PMCCNTR_EL0。

      在内核态使能,可以是编写单独内核模块,也可以在内核代码任意被执行的位置加上设置使能PMU寄存器代码即可。Linux下使能(Enable)用户态访问PMU内核模块代码:

/*                                                                             
 * Enable user-mode ARM performance counter access.                            
 */                                                                            
#include                                                       
#include                                                       
#include                                                          
                                                                               

#define PERF_DEF_OPTS 		(1 | 16)                                                                       
#define PERF_OPT_RESET_CYCLES 	(2 | 4)                                                                  
#define PERF_OPT_DIV64 		(8)                                                                          
#define ARMV8_PMCR_MASK         0x3f                                                                    
#define ARMV8_PMCR_E            (1 << 0) /* Enable all counters */                                      
#define ARMV8_PMCR_P            (1 << 1) /* Reset all counters */                                       
#define ARMV8_PMCR_C            (1 << 2) /* Cycle counter reset */                                      
#define ARMV8_PMCR_D            (1 << 3) /* CCNT counts every 64th cpu cycle */                         
#define ARMV8_PMCR_X            (1 << 4) /* Export to ETM */                                            
#define ARMV8_PMCR_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/                        
#define ARMV8_PMCR_LC           (1 << 6) /* Cycle Counter 64bit overflow*/
#define ARMV8_PMCR_N_SHIFT      11       /* Number of counters supported */                             
#define ARMV8_PMCR_N_MASK       0x1f                                                                    
                                                                                                        
#define ARMV8_PMUSERENR_EN_EL0  (1 << 0) /* EL0 access enable */                                        
#define ARMV8_PMUSERENR_CR      (1 << 2) /* Cycle counter read enable */                                
#define ARMV8_PMUSERENR_ER      (1 << 3) /* Event counter read enable */                                
                                                                                                        
static inline u32 armv8pmu_pmcr_read(void)                                                              
{                                                                                                       
        u64 val=0;                                                                                      
        asm volatile("mrs %0, pmcr_el0" : "=r" (val));                                                  
        return (u32)val;                                                                                
}                                                                                                       
static inline void armv8pmu_pmcr_write(u32 val)                                                         
{                                                                                                       
        val &= ARMV8_PMCR_MASK;                                                                         
        isb();                                                                                          
        asm volatile("msr pmcr_el0, %0" : : "r" ((u64)val));                                            
}       

static inline  long long armv8_read_CNTPCT_EL0(void)
{
   long long val;
   asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val));

   return val;
}

                                                                                                        
static void                                                                                             
enable_cpu_counters(void* data)                                                                         
{                                                                                                       
	u32 val=0;                                                         
	asm volatile("msr pmuserenr_el0, %0" : : "r"(0xf));
	armv8pmu_pmcr_write(ARMV8_PMCR_LC|ARMV8_PMCR_E);                                                      
        asm volatile("msr PMCNTENSET_EL0, %0" :: "r" ((u32)(1<<31)));
	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E|ARMV8_PMCR_LC);   
        printk("\nCPU:%d ", smp_processor_id());
}                                                                                                       
                                                                                                        
static void                                                                                             
disable_cpu_counters(void* data)                                                                        
{                                                                                                       
	u32 val=0;                                                                                             
	printk(KERN_INFO "\ndisabling user-mode PMU access on CPU #%d",                       
	smp_processor_id());                                                                                   
                                                                                                        
	/* Program PMU and disable all counters */                                                             
        armv8pmu_pmcr_write(armv8pmu_pmcr_read() |~ARMV8_PMCR_E);                                              
	asm volatile("msr pmuserenr_el0, %0" : : "r"((u64)0));                                                 
                                                                                                        
}                                                                                                       
                                                                                                        
static int __init                                                                                       
init(void)                                                                                              
{                                                                       
	u64 cval;
        u32 val;

        isb();
        asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(cval));
        printk("\nCPU Cycle count:%llu \n", cval);
        asm volatile("mrs %0, PMCNTENSET_EL0" : "=r"(val));
        printk("PMCNTENSET_EL0:%lX ", val);
        asm volatile("mrs %0, PMCR_EL0" : "=r"(val));
        printk("\nPMCR_EL0 Register:%lX ", val);

        on_each_cpu(enable_cpu_counters, NULL, 1);                                                             
        printk(KERN_INFO "Enable Access PMU Initialized");                                                       
	return 0;                                                                                              
}                                                                                                       
                                                                                                        
static void __exit                                                                                      
fini(void)                                                                                              
{                                                                                                       
	on_each_cpu(disable_cpu_counters, NULL, 1);                                                            
	printk(KERN_INFO "Access PMU Disabled");                                                          
}                                                                                                       
                                                                                                        
module_init(init);                                                                                      
module_exit(fini);

2、x86下TSC的值,在CPU上电后就开始累加,且是只读寄存器。但在ARM中,只有使能PMCCNTR_EL0后,TSC才开始累加计数,且PMCCNTR_EL0寄存器可清零,相当于计时器

    用户态读取ARMv8 PMU寄存器代码:

#include 
#include 
#include 

/* All counters, including PMCCNTR_EL0, are disabled/enabled */

#define QUADD_ARMV8_PMCR_E      (1 << 0)
/* Reset all event counters, not including PMCCNTR_EL0, to 0 */
#define QUADD_ARMV8_PMCR_P      (1 << 1)
/* Reset PMCCNTR_EL0 to 0 */
#define QUADD_ARMV8_PMCR_C      (1 << 2)
/* Clock divider: PMCCNTR_EL0 counts every clock cycle/every 64 clock cycles */
#define QUADD_ARMV8_PMCR_D      (1 << 3)
/* Export of events is disabled/enabled */
#define QUADD_ARMV8_PMCR_X      (1 << 4)
/* Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited */
#define QUADD_ARMV8_PMCR_DP     (1 << 5)
/* Long cycle count enable */
#define QUADD_ARMV8_PMCR_LC     (1 << 6)

#define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */

static inline unsigned int armv8_pmu_pmcr_read(void)
{
        unsigned int val;
        /* Read Performance Monitors Control Register */
        asm volatile("mrs %0, pmcr_el0" : "=r" (val));
        return val;
}
static inline void armv8_pmu_pmcr_write(unsigned int val)
{
    asm volatile("msr pmcr_el0, %0" : :"r" (val & ARMV8_PMCR_MASK));
}


static inline  long long armv8_read_CNTPCT_EL0(void)
{
   long long val;
   asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val));

   return val;
}

static void enable_all_counters(void)
{
 
    return;
    unsigned int val;
    /* Enable all counters */
    val = armv8_pmu_pmcr_read();
    val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
    armv8_pmu_pmcr_write(val);
}

static void reset_all_counters(void)
{

   return ; 
   unsigned int val;
    val = armv8_pmu_pmcr_read();
    val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
    armv8_pmu_pmcr_write(val);
}

static unsigned int enabled=0;

unsigned int readticks(unsigned int *result)
{
    struct timeval t;
    unsigned int cc;
    unsigned int val;
    if (!enabled) {
        reset_all_counters();
        enable_all_counters();
        enabled = 1;
    }
    cc = armv8_pmu_pmcr_read();
    gettimeofday(&t,(struct timezone *) 0);
    result[0] = cc;
    result[1] = t.tv_usec;
    result[2] = t.tv_sec;
  
    return cc;
}


static inline unsigned int armv8pmu_pmcr_read(void)
{
	unsigned int val;
	asm volatile("mrs %0, pmcr_el0" : "=r" (val));
	return val;
}

#define u32 unsigned int
#define u64 unsigned long long
#define isb()		asm volatile("isb" : : : "memory")

static inline u64 arch_counter_get_cntpct(void)
{
	u64 cval;

	isb();
        asm volatile("mrs %0, PMCCNTR_EL0" : "+r"(cval));
	return cval;
}

int main()
{

  unsigned int start,end;
  unsigned int result[3]; 
  unsigned long long timer;
  u32 pmcr_el;

  pmcr_el = armv8pmu_pmcr_read();
  printf("\nPMCR_EL0 Register:%lX ", pmcr_el);
  timer = arch_counter_get_cntpct();
  printf("\nCPU Cycle Count:0x%llX ",timer);  
  sleep(5);
  timer = arch_counter_get_cntpct();
  printf("\nCPU Cycle Count:0x%llX \n",timer);
  asm volatile("mrs %0, PMOVSCLR_EL0" : "=r"(pmcr_el));
  printf(" Register PMOVSCLR_EL0:0x%lX \n", pmcr_el); 
  
  asm volatile("mrs %0, pmuserenr_el0" : "=r"(pmcr_el));
  printf(" Register pmuserenr_el0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCNTENSET_EL0" : "=r"(pmcr_el));
  printf(" Register PMCNTENSET_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCCFILTR_EL0" : "=r"(pmcr_el));
  printf(" Register PMCCFILTR_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCNTENCLR_EL0" : "=r"(pmcr_el));
  printf(" Register PMCNTENCLR_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMOVSSET_EL0" : "=r"(pmcr_el));
  printf(" Register PMOVSSET_EL0:0x%lX \n", pmcr_el);

  return 0;

}
The following table shows the PMCR_EL0 bit assignments for a System register access.

Table 11-4 PMCR_EL0 bit assignments

Bits Name Function
[31:24] IMP
Implementer code:
0x41 ARM.
This is a read-only field.
[23:16] IDCODE
Identification code:
0x01 Cortex-A57 processor.
This is a read-only field.
[15:11] N
Number of event counters.
In Non-secure modes other than Hyp mode, this field reads the value of HDCR.HPMN. See 4.5.12 Hyp Debug Control Register.
In Secure state and Hyp mode, this field returns 0x6 that indicates the number of counters implemented.
This is a read-only field.
[10:7] Reserved, RES0.
[6] LC
Long cycle count enable. Selects which PMCCNTR_EL0 bit generates an overflow recorded in PMOVSR[31]:
0 Overflow on increment that changes PMCCNTR_EL0[31] from 1 to 0.
1 Overflow on increment that changes PMCCNTR_EL0[63] from 1 to 0.
[5] DP
Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited:
0 Cycle counter operates regardless of the non-invasive debug authentication settings.
1 Cycle counter is disabled if non-invasive debug is not permitted and enabled.
This bit is read/write.
[4] X
Export enable. This bit permits events to be exported to another debug device, such as a trace macrocell, over an event bus:
0 Export of events is disabled.
1 Export of events is enabled.
This bit is read/write and does not affect the generation of Performance Monitors interrupts, that can be implemented as a signal exported from the processor to an interrupt controller.
[3] D
Clock divider:
0 When enabled, PMCCNTR_EL0 counts every clock cycle.
1 When enabled, PMCCNTR_EL0 counts every 64 clock cycles.
This bit is read/write.
[2] C
Clock counter reset:
0 No action.
1 Reset PMCCNTR_EL0 to 0.

Note

Resetting PMCCNTR does not clear the PMCCNTR_EL0 overflow bit to 0. See the ARM® Architecture Reference Manual ARMv8 for more information.

This bit is write-only, and always RAZ.
[1] P
Event counter reset:
0 No action.
1 Reset all event counters, not including PMCCNTR_EL0, to 0.
In Non-secure modes other than Hyp mode, a write of 1 to this bit does not reset event counters that the HDCR.HPMN field reserves for Hyp mode use. See 4.5.12 Hyp Debug Control Register.
In Secure state and Hyp mode, a write of 1 to this bit resets all the event counters.
[0] E
Enable bit. This bit does not disable or enable, counting by event counters reserved for Hyp mode by HDCR.HPMN. It also does not suppress the generation of performance monitor overflow interrupt requests by those counters:
0 All counters, including PMCCNTR_EL0, are disabled. This is the reset value.
1 All counters are enabled.
This bit is read/write.
测试结果:
 Linux linux 3.16.0+ #111 SMP Sat Mar 28 09:09:43 CST 2015 aarch64 aarch64 aarch64 GNU/Linux
linux:/home/hw-1020 # ./tsc_1 
PMCR_EL0 Register:41013001 
CPU Cycle Count:0x1364512659B 
CPU Cycle Count:0x136C23FE71D 
 
 Register PMOVSCLR_EL0:0x80000000 
 Register pmuserenr_el0:0xF 
 Register PMCNTENSET_EL0:0x80000000 
 Register PMCCFILTR_EL0:0x8000000 
 Register PMCNTENCLR_EL0:0x80000000 
 Register PMOVSSET_EL0:0x80000000 
linux:/home/hw-1020 # ./tsc_3
 
PMCR_EL0 Register:41013001 
CPU Cycle Count:0x1399B0D6576 
CPU Cycle Count:0x13B1291DDE0 
 Register PMOVSCLR_EL0:0x80000000 
 Register pmuserenr_el0:0xF 
 Register PMCNTENSET_EL0:0x80000000 
 Register PMCCFILTR_EL0:0x8000000 
 Register PMCNTENCLR_EL0:0x80000000 
 Register PMOVSSET_EL0:0x80000000 
linux:/home/hw-1020 # ./tsc_5
 
PMCR_EL0 Register:41013001 
CPU Cycle Count:0x13C2DF8BFAA 
CPU Cycle Count:0x13E9FD420AB 
 Register PMOVSCLR_EL0:0x80000000 
 Register pmuserenr_el0:0xF 
 Register PMCNTENSET_EL0:0x80000000 
 Register PMCCFILTR_EL0:0x8000000 
 Register PMCNTENCLR_EL0:0x80000000 
 Register PMOVSSET_EL0:0x80000000 
linux:/home/hw-1020 # 

示例代码下载:read aarch64 TSC

Share this:

返回