4
votes

I'm currently trying to use the event counters on an ARM Cortex-a9 (on a Xilinx zynq EPP) to count cycles. I've adapted some ARM example code from ARM for this purpose. I'm programming this bare-metal with the GNU ARM EABI compiler.

The way I understand the use of the PMU is that you first have to enable the PMU.

void enable_pmu (void){
    asm volatile(   "MRC     p15, 0, r0, c9, c12, 0\n\t"
                    "ORR     r0, r0, #0x01\n\t"
                    "MCR     p15, 0, r0, c9, c12, 0\n\t"
    );
}

then you configure the performance counter to count a certain type of event (0x11 for cycles in this case)

void config_pmn(unsigned counter,int event){
    asm volatile(   "AND     %[counter], %[counter], #0x1F\n\t" :: [counter] "r" (counter));    //Mask to leave only bits 4:0
    asm volatile(   "MCR     p15, 0, %[counter], c9, c12, 5\n\t" :: [counter] "r" (counter));   //Write PMSELR Register
    asm volatile(   "ISB\n\t");                                                                 //Synchronize context
    asm volatile(   "MCR     p15, 0, %[event], c9, c13, 1\n\t" :: [event] "r" (counter));       //Write PMXEVTYPER Register
}

Then you enable the event counter

void enable_pmn(int counter){
    asm volatile(   "MOV     r1, #0x1\n\t");
    asm volatile(   "MOV     r1, r1, LSL %[counter]\n\t" :: [counter] "r" (counter));
    asm volatile(   "MCR     p15, 0, r1, c9, c12, 1\n\t");      //Write PMCNTENSET Register
}

after this you immediately reset the event counter

void reset_pmn(void){
    asm volatile(   "MRC     p15, 0, r0, c9, c12, 0\n\t");  //Read PMCR
    asm volatile(   "ORR     r0, r0, #0x2\n\t");            //Set P bit (Event counter reset)
    asm volatile(   "MCR     p15, 0, r0, c9, c12, 0\n\t");  //Write PMCR
}

you let your application run and read the event counter

int read_pmn(int counter){
    int value;
    asm volatile(   "AND     %0,%0, #0x1F\n\t" :: "r" (counter));          //Mask to leave only bits 4:0
    asm volatile(   "MCR     p15, 0, %[counter], c9, c12, 5\n\t" ::[counter] "r" (counter));        //Write PMSELR Register
    asm volatile(   "ISB\n\t");                                                                     //Synchronize context
    asm volatile(   "MRC     p15, 0,%[value] , c9, c13, 2\n\t" : [value] "=r" (value));                 //Read current PMNx Register
    return value;
}

and then you disable the event counter

void disable_pmn(int counter){
     asm volatile(  "MOV     r1, #0x1\n\t");
     asm volatile(  "MOV     r1, r1, LSL %[counter] \n\t":: [counter] "r" (counter));
     asm volatile(  "MCR     p15, 0, r1, c9, c12, 2\n\t");  //Write PMCNTENCLR Register
}

and the pmu.

void disable_pmu (void){
    asm volatile(   "MRC     p15, 0, r0, c9, c12, 0\n\t"
                    "BIC     r0, r0, #0x01\n\t"
                    "MCR     p15, 0, r0, c9, c12, 0\n\t"
    );
}

However when I try to read the value stored in the event counter I get 0. I know my PMU is configured correctly because I'm able to read the cycle counter (PMCCNTR) without a problem. Probably there is a problem with the way I configure the counter or the way I read it. This inline assembly stuff is pretty new to me so if somebody can point me in the right direction I would be forever grateful.

1
See Cortex-A8 pmnc and Cortex-a8 profiling, they might be helpful. And the Linux perf_event_v7.c which is suppose to be for Cortex CPUs afaik. It could be something simple; I always have trouble with the MCR/MRC parameters. - artless noise
You also have some issues with the in-line assembler. In a lot of cases, you modify counter, but don't annotate this. Also, you are using a hard-coded r0,r1, but didn't specify this. You can group multiple asm op-codes in the same asm statement. Just use a \n; you don't need to specify the parameters so many times then, but get them right. Also A8 A8-2 - artless noise

1 Answers

3
votes

Section C.12.8.5 of the ARM Architecture Reference outlines "Required events", and I've found that the Zynq supports only a bare minimum of PMU events. Trying to use unsupported events just gives zero counts, as you describe.

Attached below is a small example of how to manipulate the registers of coprocessor 15 to set up the counters and read their values:

// My system has 6 configurable counters and a separate Cycle Count register.
// This will contain a nice human-readable name for the configured counters.
const char* cpu_name[7] = { "", "", "", "", "", "", "CCNT" };

typedef struct {
  u32 reg[7];       // 6 configurables and the cycle count
} cpu_perf;


inline u32 _read_cpu_counter(int r) {
  // Read PMXEVCNTR #r
  // This is done by first writing the counter number to PMSELR and then reading PMXEVCNTR
  u32 ret;
  asm volatile ("MCR p15, 0, %0, c9, c12, 5\t\n" :: "r"(r));      // Select event counter in PMSELR
  asm volatile ("MRC p15, 0, %0, c9, c13, 2\t\n" : "=r"(ret));    // Read from PMXEVCNTR
  return ret;
}

inline void _setup_cpu_counter(u32 r, u32 event, const char* name) {
  cpu_name[r] = name;

  // Write PMXEVTYPER #r
  // This is done by first writing the counter number to PMSELR and then writing PMXEVTYPER
  asm volatile ("MCR p15, 0, %0, c9, c12, 5\t\n" :: "r"(r));        // Select event counter in PMSELR
  asm volatile ("MCR p15, 0, %0, c9, c13, 1\t\n" :: "r"(event));    // Set the event number in PMXEVTYPER
}

void init_cpu_perf() {

  // Disable all counters for configuration (PCMCNTENCLR)
  asm volatile ("MCR p15, 0, %0, c9, c12, 2\t\n" :: "r"(0x8000003f));

  // disable counter overflow interrupts
  asm volatile ("MCR p15, 0, %0, c9, c14, 2\n\t" :: "r"(0x8000003f));


  // Select which events to count in the 6 configurable counters
  // Note that both of these examples come from the list of required events.
  _setup_cpu_counter(0, 0x04, "L1DACC");
  _setup_cpu_counter(1, 0x03, "L1DFILL");

}


inline void reset_cpu_perf() {

  // Disable all counters (PMCNTENCLR):
  asm volatile ("MCR p15, 0, %0, c9, c12, 2\t\n" :: "r"(0x8000003f));

  u32 pmcr  = 0x1    // enable counters
            | 0x2    // reset all other counters
            | 0x4    // reset cycle counter
            | 0x8    // enable "by 64" divider for CCNT.
            | 0x10;  // Export events to external monitoring

  // program the performance-counter control-register (PMCR):
  asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(pmcr));

  // clear overflows (PMOVSR):
  asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000003f));

  // Enable all counters (PMCNTENSET):
  asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000003f));

}

inline cpu_perf get_cpu_perf() {
  cpu_perf ret;
  int r;

  // Read the configurable counters
  for (r=0; r<6; ++r) {
    ret.reg[r] = _read_cpu_counter(r);
  }

  // Read CPU cycle count from the CCNT Register
  asm volatile ("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(ret.reg[6]));

  return ret;
}

int main() {
  init_cpu_perf();

  // Here's what a test looks like:
  reset_cpu_perf();
  /*
   * ... Perform your operations
   */
  cpu_perf results_1 = get_cpu_perf();

}