3
votes

I have a simple program that initializes a c style string and then initializes a character. I then use the function strcpy to cause a buffer overflow situation which would seemingly overwrite the memory content of the character variable x (assuming it is stored in adjacent memory).

char str[] = "Testt";
char x = 'X';

// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);

// print value of x
printf("%c\n", x);

// cause buffer overflow
strcpy(str, "Hello world");

// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);

// print address and value of x
// printf("%p: ", &x);
printf("%c\n", x);
return 0;

When run, this code produces output that looks like

0061FF29: Testt
X
0061FF29: Hello world
w

This situation shows that the buffer overflow did occur, and it caused the value of the x variable to change from 'X' to 'w'.

However, if I remove the commented // printf("%p: ", &x); on the third to last line, the buffer overflow does not cause the x variable to be overwritten.

For clarity here is that code (notice the change on the third to last line)

char str[] = "Testt";
char x = 'X';

// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);

// print value of x
printf("%c\n", x);

// cause buffer overflow
strcpy(str, "Hello world");

// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);

// print address and value of x
printf("%p: ", &x);
printf("%c\n", x);
return 0;

This causes the output to be:

0061FF2A: Testt
X
0061FF2A: Hello world
0061FF29: X

So in this situation, the buffer overflow did not overwrite the x variable.

Why does simply printing the memory address of the x variable have this affect on the buffer overflow situation?

edit: added in assembly for the two situations The generated assembly for the first case (no printf):

    .file   "hello.c"
    .def    ___main;    .scl    2;  .type   32; .endef
    .section .rdata,"dr"
LC0:
    .ascii "%p: \0"
LC1:
    .ascii "%c\12\0"
    .text
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB17:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    subl    $32, %esp
    call    ___main
    movl    $1953719636, 25(%esp)
    movw    $116, 29(%esp)
    movb    $88, 31(%esp)
    leal    25(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC0, (%esp)
    call    _printf
    leal    25(%esp), %eax
    movl    %eax, (%esp)
    call    _puts
    movsbl  31(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC1, (%esp)
    call    _printf
    leal    25(%esp), %eax
    movl    $1819043144, (%eax)
    movl    $1870078063, 4(%eax)
    movl    $6581362, 8(%eax)
    leal    25(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC0, (%esp)
    call    _printf
    leal    25(%esp), %eax
    movl    %eax, (%esp)
    call    _puts
    movsbl  31(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC1, (%esp)
    call    _printf
    movl    $0, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE17:
    .ident  "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
    .def    _printf;    .scl    2;  .type   32; .endef
    .def    _puts;  .scl    2;  .type   32; .endef

and for the second situation

    .file   "hello.c"
    .def    ___main;    .scl    2;  .type   32; .endef
    .section .rdata,"dr"
LC0:
    .ascii "%p: \0"
LC1:
    .ascii "%c\12\0"
    .text
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB17:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    subl    $32, %esp
    call    ___main
    movl    $1953719636, 26(%esp)
    movw    $116, 30(%esp)
    movb    $88, 25(%esp)
    leal    26(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC0, (%esp)
    call    _printf
    leal    26(%esp), %eax
    movl    %eax, (%esp)
    call    _puts
    movzbl  25(%esp), %eax
    movsbl  %al, %eax
    movl    %eax, 4(%esp)
    movl    $LC1, (%esp)
    call    _printf
    leal    26(%esp), %eax
    movl    $1819043144, (%eax)
    movl    $1870078063, 4(%eax)
    movl    $6581362, 8(%eax)
    leal    26(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC0, (%esp)
    call    _printf
    leal    26(%esp), %eax
    movl    %eax, (%esp)
    call    _puts
    leal    25(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $LC0, (%esp)
    call    _printf
    movzbl  25(%esp), %eax
    movsbl  %al, %eax
    movl    %eax, 4(%esp)
    movl    $LC1, (%esp)
    call    _printf
    movl    $0, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE17:
    .ident  "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
    .def    _printf;    .scl    2;  .type   32; .endef
    .def    _puts;  .scl    2;  .type   32; .endef
1
The C people do not like explaining behavior which is undefined.Eugene Sh.
Check the generated assembly.tkausl
It is undefined behavior in either case.Jason
Undefined behavior is undefined. Don't try to make too much sense of it.Christian Gibbons
If you look at the assembly (or even your output), you'll see that the order of the variables x and str differs in the two programs. The compiler can place local variables in any arbitrary order, and the algorithm it uses can be totally opaque. But my guess is that the difference here has more to do with the fact that you take the address of x in the second one, thereby forcing it to have an address. If you compiled with optimisation, you'd probably find that x in the first program doesn't actually exist in memory; it xan simply live in a register (or be eliminated altogether).rici

1 Answers

2
votes

First lets look at why a buffer overflow did not happen in the second example.

Looking at your output:

0061FF2A: Testt
X
0061FF2A: Hello world
0061FF29: X

We can see that str is above x on the stack.

The string "Hello world" is taking up memory addresses 0061FF2A through 0061FF36

The stack looks something like

0061FF29   0061FF2A        0061FF36
       |   |                      |
       ----------------------------
       | X | H e l l o  w o r l d |
       ----------------------------

In this case it doesn't matter how far past the end of str we write because x comes before str on the stack.


Next lets look at why a buffer overflow did happen in the first example.

We can't see the addresses of each variable directly in your output however we can see their locations on the stack in the assembly.

movl    $1953719636, 25(%esp)
movw    $116, 29(%esp)
movb    $88, 31(%esp)

The x variable is definitely at 31(%esp) as we see the decimal ASCII value for 'X' being placed there.

It is not too big of a leap to assume that the 5 character string "Testt" is being stored at 25(%esp) as the distance between 25(%esp) and 31(%esp) is just enough to store 5 characters and a null terminator.

So we know str is at 25(%esp) and x is at 31(%esp). The stack should look something like:

esp  +25         +31
  |    |           | 
  ---------------------- 
  |    | T e s t t | X |
  ----------------------

Now we can easily see that str comes before x and it is clear to see why writing past the end of str would cause x to be overwritten.


Now the main question, Why did this work in the first case but not the second?

For some reason the compiler decided to place x after str in the first example and x before str in the second example.

As was pointed out in the comments, the exact location of local variables on the stack is not defined by C. The compiler can decide the order it wants things stored in and may change that order from program to program for non-obvious reasons.

Essentially, the exact location and ordering of local variables on the stack is undefined, and so undefined behavior is why the buffer overflow works in one case but not the other.