2
votes

I have to put my device into a very deep low power mode from Linux 2.6.38 and therefore, it's necessary to suspend all components, including CPU und DDR2.

What I found out so far is that I have to copy the core assembler function into the processor's internal memory and execute it from there. Basically, it looks like this:

cpaddr = iram_alloc(SZ_1K, &iram_addr);
if(!cpaddr) return -ENOMEM;
suspend_iram_base = __arm_ioremap(iram_addr, SZ_1K, MT_HIGH_VECTORS);
memcpy(suspend_iram_base, cpu_v6_sdram_off, SZ_1K);
flush_icache_range(suspend_iram_base, suspend_iram_base + SZ_1K);
flush_cache_all();

__asm__ __volatile__(
  "ldr r0, %0\n"
  "ldr r1, %1\n"
  "ldr r2, %2\n"
  "blx r2\n"
  "nop\n"
  : : "m" (esdctl_addr),
      "m" (csd0_addr),
      "m" (suspend_iram_base));

So far everything works as expected, I can verify code execution from internal memory (in virtual address space) with the JTAG debugger.

If I understand it all correctly, I have to do the following in the IRAM function:

  • disable interrupts and caches
  • set the SDRAM controller into precharge power down mode
  • execute a precharge all command and access memory with A10 high (e.g. 0x400) to effectively close all banks
  • put the CPU into standby by executing a WFI instruction
  • re-enable everything afterwards (left out in the source code below)

The correspondent code looks like this:

ENTRY(cpu_v6_sdram_off)
  @ r0: esdctl base address
  @ r1: csd0 address with a10 high

  cpsid   if

  @ disable I and D cache
  mrc     p15, 0, r2, c1, c0, 0
  bic     r2, r2, #0x00001000  @ disable I cache
  bic     r2, r2, #0x00000004  @ disable D cache
  mcr     p15, 0, r2, c1, c0, 0

  @ invalidate I cache
  mov     r2, #0
  mcr     p15, 0, r2, c7, c5, 0

  @ clear and invalidate D cache
  mov     r2, #0
  mcr     p15, 0, r2, c7, c14, 0

  @ precharge power down mode
  ldr     r2, [r0]
  bic     r2, r2, #0xc00
  orr     r2, r2, #0x400
  str     r2, [r0]

  @ precharge all command
  mov     r2, #0x92
  lsl     r2, #24
  orr     r2, r2, #0x228000
  orr     r2, r2, #0x0400
  str     r2, [r0]
  mov     r2, #0x12
  lsl     r2, #24
  orr     r2, r2, #0x340000
  orr     r2, r2, #0x5600
  orr     r2, r2, #0x78
  str     r2, [r1] @ dummy write access

  @ execute wait for interrupt
  mov     r1, #0
  mcr     p15, 0, r1, c7, c10, 4
  mcr     p15, 0, r1, c7, c0, 4

  cpsie   if
  bx      lr
ENDPROC(cpu_v6_sdram_off)

The problem is at the point where the RAM is accessed with a dummy write. It simply results in a data abort exception and then the CPU gets lost. If I leave this part out, the DDR2 doesn't seem to be put into low power mode, because the current consumption doesn't go down.

Now I'm totally stuck and out of ideas here. Could someone please give me a hint what I'm doing wrong or what I'm missing here? Or is there any documentation or source code available demonstrating the whole procedure for the i.MX35 on Linux?

2

2 Answers

1
votes

As well as disabling the icache and dcache, it is needed to drain any buffers. I have only implemented this on an IMX25; It is an ARM926 (armv5). I am now developing for an armv7 and it seems like a dcache flush maybe appropriate. Ie, ensure that the CPU dumps everything to SDRAM.

Now, it also seems you missed a key step of turning off the MMU. When you run str r2, [r1] @ dummy write access, you will get a TLB miss and try to access the page tables, which are probably in SDRAM. I see a problem ;-). Luckily you have assembler which is PC relative and will run anywhere, anytime.

Here is a sample function to disable the MMU before calling the routine physically. It is for the ARMV5, you need to update the p15 values to the functional equivalents for your CPU.

static void phys_execute(void /*@unused@*/ (*function_pointer)(void))
{
    __asm volatile (
        "   push    {r4-r12,lr}                 \n" /* save everything */
        "1: mrc     p15, 0, r15, c7, c14, 3     \n" /* armv5 specific.. */
        "   bne     1b                          \n" /* dcache clean */
        "   mov     r8, #0                      \n"
        "   mcr     p15, 0, r8, c7, c5, 0       \n" /* invalidate icache */
        "   mcr     p15, 0, r8, c7, c10, 4      \n" /* drain wb armv5 */
        "   mrc     p15, 0, r10, c1, c0, 0      \n" /* caches/mmu off */
        "   bic     r8, r10, #0x5               \n"
        "   bic     r8, r8, #0x1000             \n"
        "   mcr     p15, 0, r8, c1, c0, 0       \n"
        "   blx     r0                          \n" /* Call r0 */
        "   mcr     p15, 0, r10, c1, c0, 0      \n"  /* caches on..*
        "1: mrc     p15, 0, r15, c7, c14, 3     \n"  /* armv5 again */
        "   mov     r8, #0                      \n"
        "   bne     1b                          \n"
        "   mcr     p15, 0, r8, c7, c5, 0       \n"
        "   mcr     p15, 0, r8, c7, c10, 4      \n"
        "   pop     {r4-r12,pc}                 \n"
        );
}

r1 and r2 will make it to the routine called via physical ram. You can re-jig this to hard code three parameters and then the function pointer to put it in r4. However, your

 @ r0: esdctl base address
 @ r1: csd0 address with a10 high

must change to be physical addresses so that when cpu_v6_sdram_off runs, it will be accessing the non-virtual addresses.

1
votes

Thanks for your help!

Well, not so pretty simple ;-) But putting all together what I've understood from your answer, I ended up with the following - certainly not very clean, but at least working - code:

__asm__ __volatile__(
  "push    {r4-r12, lr}\n"
  "cpsid   if\n"
  "mov     r0, #0\n"
  "orr     r0, r0, %0\n"
  "mov     r2, #0\n"
  "mcr     p15, 0, r2, c7, c14, 0\n" // clear and invalidate D cache
  "mov     r2, #0\n"
  "mcr     p15, 0, r2, c7, c5, 0\n" // invalidate I cache
  "mov     r2, #0\n"
  "mcr     p15, 0, r2, c7, c10, 4\n" // data synchronisation barrier (drain write buffer)
  "mrc     p15, 0, r2, c1, c0, 0\n"
  "bic     r2, r2, #0x00001000\n" // disable I cache
  "bic     r2, r2, #0x00000004\n" // disable D cache
  "bic     r2, r2, #0x00000001\n" // disable MMU
  "mcr     p15, 0, r2, c1, c0, 0\n"
  "add     r1, pc, #8\n"
  "sub     r1, #0xc0000000\n"
  "add     r1, #0x80000000\n"
  "blx     r0\n"
  "nop     \n"
  "add     r1, pc, #28\n"
  "sub     r1, #0x80000000\n"
  "add     r1, #0xc0000000\n"
  "mrc     p15, 0, r2, c1, c0, 0\n"
  "orr     r2, r2, #0x00001000\n" // enable I cache
  "orr     r2, r2, #0x00000004\n" // enable D cache
  "orr     r2, r2, #0x00000001\n" // enable MMU
  "mcr     p15, 0, r2, c1, c0, 0\n"
  "bx      r1\n"
  "nop     \n"
  "cpsie   if\n"
  "pop     {r4-r12, pc}\n"
  : : "r" (asm_func));

According to the ARM1136 Technical Reference Manual, "Data Synchronisation Barrier" should be used instead of "Drain Write Buffer" on ARMv6, so I took this one.

The two nop commands mark the jump destinations when changing address space. Register r0 contains the physical code location of cpu_v6_sdram_off in IRAM.

The entire suspend/resume code now looks like this:

ENTRY(cpu_v6_sdram_off)
  @ r1: physical return address

  @ precharge power down mode
  ldr     r0, =MX35_ESDCTL_BASE_ADDR
  ldr     r2, [r0]
  bic     r2, r2, #0xc00
  orr     r2, r2, #0x400
  str     r2, [r0]

  @ precharge all command
  mov     r2, #0x92
  lsl     r2, #24
  orr     r2, r2, #0x228000
  orr     r2, r2, #0x0400
  str     r2, [r0]

  ldr     r0, =MX35_CSD0_BASE_ADDR
  add     r0, #0x400
  mov     r2, #0x12
  lsl     r2, #24
  orr     r2, r2, #0x340000
  orr     r2, r2, #0x5600
  orr     r2, r2, #0x78
  str     r2, [r0]

  @ execute wait for interrupt
  nop
  mov     r2, #0
  mcr     p15, 0, r2, c7, c10, 4
  mcr     p15, 0, r2, c7, c0, 4

  nop
  nop
  nop
  nop
  nop

  @ precharge all command
  ldr     r0, =MX35_ESDCTL_BASE_ADDR
  mov     r2, #0x92
  lsl     r2, #24
  orr     r2, r2, #0x228000
  str     r2, [r0]

  @ set manual refresh mode
  mov     r2, #0xa2
  lsl     r2, #24
  add     r2, r2, #0x220000
  str     r2, [r0]

  # access memory two times
  ldr     r0, =MX35_CSD0_BASE_ADDR
  mov     r2, #0x12
  lsl     r2, #24
  orr     r2, r2, #0x340000
  orr     r2, r2, #0x5600
  orr     r2, r2, #0x78
  str     r2, [r0]
  nop
  str     r2, [r0]

  @ enable auto-refresh
  ldr     r0, =MX35_ESDCTL_BASE_ADDR
  mov     r2, #0x82
  lsl     r2, #24
  add     r2, #0x228000
  add     r2, #0x80
  str     r2, [r0]

  bx      r1
ENDPROC(cpu_v6_sdram_off)

If someone maybe likes to correct or optimize this code, please keep me informed. Thanks!