Mark Floryan (mrf8t@virginia.edu)
Aaron Bloomfield (aaron@virginia.edu)
@github | ↑ |
Data Representation | Program Representation | |||||
string int x[3] char x 0x9cd0f0ad 01101011 |
![]() |
Objects Arrays Primitive types Addresses bits |
Java code C++ code C code x86 code IBCM hexadecimal |
![]() |
High-level language Low-level language Assembly language Machine code |
Introduction to x86
x86 Instruction Set
Calling Conventions
Callee Rules
Caller Rules
Activation Records
x86 Examples
|
|
while(power is on) {
IR := mem[PC]
PC := PC + 1 (word) // 32-bits in x86
execute instruction in IR
}
Directives
|
|
mov <dest>, <src>
|
Incorrect: (why?)
|
|
Memory:
|
mov eax, [4*esi-edx]
mov eax, [4*esi+4]
mov eax, [4*esi+edx+8]
mov eax, [esi+4*edx]
mov eax+4, [esi]
mov [eax], [var]
mov [eax+4], [ebx]
mov 20, [eax]
mov
push
push eax
push [var]
pop
pop eax
pop [var]
lea
lea eax, [var]
lea edi, [ebx+4*esi]
add
, sub
add <reg>, <reg>
add <reg>, <mem>
add <mem>, <reg>
add <reg>, <constant>
add <mem>, <constant>
inc
, dec
(increment and decrement by one)inc <reg>
inc <mem>
dec eax
inc [var]
imul
imul <reg32>, <reg32> (or <mem>)
imul <reg32>, <reg32> (or <mem>), <con>
idiv
idiv ebx
and
, or
, xor
and <reg>, <reg>
and <reg>, <mem>
and <mem>, <reg>
and eax, 0fH
xor ecx, ecx
jmp <label>
cmp
cmp
je <label>
jne
, jz
, jg
, jge
, jl
, jle
, js
, etc.call <label>
ret
C/C++ code:
|
Assembly code:
|
int max(int x, int y) {
int theMax = (x > y) ? x : y;
return theMax;
}
int main() {
int a = 5, b = 6;
int maxVal = max(a,b);
cout << "Max value: " << maxVal << endl;
return 0;
}
pop
push
call
ret
void foo() {
// some function code ...
}
int main() {
foo();
return 0;
}
main()
is the callerfoo()
is the calleeFoo::bar(int)
and Foo::bar(int, float)
Foo::bar (int x = 3)
This code adapted from here
#include <cstdarg>
#include <iostream>
using namespace std;
double average (int num, ...) {
va_list arguments;
double sum = 0;
va_start (arguments, num);
for ( int x = 0; x < num; x++ )
sum += va_arg (arguments, double);
va_end (arguments);
return sum / num;
}
int main() {
cout << average(3, 12.2, 22.3, 4.5) << endl;
cout << average(5, 3.3, 2.2, 1.1, 5.5, 3.3) << endl;
}
va_list arguments;
va_start ( arguments, num );
for ( int x = 0; x < num; x++ )
sum += va_arg ( arguments, double );
va_end ( arguments );
return sum / num;
The C equivalent of cout is a function called printf
printf ("A %s, a %s, a %s: %s!\n", "man",
"plan", "canal", "Panama");
printf ("A percent sign: %%\n");
printf ("An int: %d\n", i);
printf ("A float with 2 decimal digits: %.2f\n",
float_value);
Output:
A man, a plan, a canal: Panama!
A percent sign: %
An int: 3
A float with 2 decimal digits: 3.14
call
opcodecall
instruction places return address in stackcall
instructionpop
them off the stack (Caller can assume no other registers were modified)We'll see this code in the following slides:
int myFunc(int a, int b, int c) {
int result;
// some code
return result;
}
int main() {
int x = 1, z = 3;
int retVal = myFunc(x, 123, z);
//...
return 0;
}
push edx ; caller wants edx to be preserved
; int retVal = myFunc(x, 123, z);
push [z] ; Push last param first
push 123 ; push constant 123
push eax ; push first param last
call myFunc ; call the function
add esp, 12 ; clean up stack
pop edx ; restore saved edx value
; return value of myFunc is now available in eax
; (if there is any return value)
This is just before the call
opcode is invoked.
↑ | value of edx | |||
To higher addresses | copy of var z | |||
(to 0xffffffff) | 123 | |||
value of eax (var x) | ← esp | |||
To lower addresses | ||||
(to 0x00000000) | ||||
↓ |
This is just after the call
opcode is invoked.
↑ | value of edx | |||
To higher addresses | copy of var z | |||
(to 0xffffffff) | 123 | |||
value of eax (var x) | ||||
return address | ← esp | |||
To lower addresses | ||||
(to 0x00000000) | ||||
↓ |
We'll see this code in the following slides:
int myFunc(int a, int b, int c) {
int result;
// some code
return result;
}
int main() {
int x = 1, z = 3;
int retVal = myFunc(x, 123, z);
//...
return 0;
}
Before the body of the function:
push ebp
mov ebp, esp
sub esp, 4
THEN, perform body of the function
pop
from stack (in reverse order from which pushed)mov esp, ebp
pop ebp
ret
With a bit more code in the myFunc()
body:
int myFunc(int a, int b, int c) {
int result;
result = c;
result += b;
return result;
}
int main() {
int x = 1, z = 3;
int retVal = myFunc(x, 123, z);
//...
return 0;
}
section .text
myFunc:
; prologue
push ebp ; save old base pointer
mov ebp, esp ; set new base pointer
sub esp, 4 ; save room for 1 local var (result)
push ebx ; save callee-save registers
push esi ; both will be used by myFunc
; subroutine body
mov eax, [ebp+8] ; param 1 to eax
mov esi, [ebp+12] ; param 2 to esi
mov ebx, [ebp+16] ; param 3 to ebx
mov [ebp-4], ebx ; put ebx into local var
add [ebp-4], esi ; add esi into local var
mov eax, [ebp-4] ; mov contents of local var to eax
; (return value/final result)
; subroutine epilogue
pop esi ; recover callee save registers
pop ebx ; REVERSE of when pushed
mov esp, ebp ; deallocate local var(s)
pop ebp ; restore caller's base pointer
ret ; pop top value from stack, jump there
This is just after the caller invokes the call
opcode.
↑ | value of edx | ↖ ebp | ||
To higher addresses | copy of var z | |||
(to 0xffffffff) | 123 | |||
value of eax (var x) | ||||
return address | ← esp | |||
To lower addresses | ||||
(to 0x00000000) | ||||
↓ |
This is just after the callee invokes the push ebp
opcode.
↑ | value of edx | ↖ ebp | ||
To higher addresses | copy of var z | |||
(to 0xffffffff) | 123 | |||
value of eax (var x) | ||||
return address | ||||
ebp backup | ← esp | |||
To lower addresses | ||||
(to 0x00000000) | ||||
↓ |
This is after the myFunc()
prologue is completed.
↑ | value of edx | |||
To higher addresses | copy of var z | [ebp+16] | ||
(to 0xffffffff) | 123 | [ebp+12] | ||
value of eax (var x) | [ebp+8] | |||
return address | ||||
ebp backup | ← ebp | |||
To lower addresses | local variable | [ebp-4] | ||
(to 0x00000000) | saved value of ebx | |||
↓ | saved value of esi | ← esp |
|
|
|
|
new
, malloc()
, etc.)void security_hole() {
char buffer[12];
scanf ("%s", buffer); // how C handles input
}
The stack looks like (with sizes in parenthesis):
esi (4) | edi (4) | buffer (12) | ebp (4) | ret addr (4) |
All examples are in the slides/code/08-x86/ directory in the github repo
Code we'll see:
g++ -S -m32 -masm=intel
(see next slide for the source code link)
int absolute_value(int x) {
if ( x < 0 ) // if x is negative
x = -x; // negate x
return x; // return x
}
Source code: test_abs.cpp (src)
#include <iostream>
using namespace std;
extern "C" int absolute_value(int x);
int absolute_value(int x) {
if (x<0) // if x is negative
x = -x; // negate x
return x; // return x
}
int main() {
int theValue=0;
cout << "Enter a value: " << endl;
cin >> theValue;
int theResult = absolute_value(theValue);
cout << "The result is: " << theResult << endl;
return 0;
}
No external source code; this is how we might write it ourselves.
; Standard prologue
push ebp
mov ebp, esp
; procedure body
mov eax, [ebp + 8] ; eax <- x
cmp eax, 0 ; x == 0 ?
jge end_of_proc ; if pos goto end
neg eax ; negate x
end_of_proc:
; Standard epilogue
mov esp, ebp
pop ebp
ret
clang++ -m32 -S test_abs.cpp -o test_abs-non-intel.s
g++ -m32 -S test_abs.cpp -o test_abs-non-intel.s
Note the source / destination order is reversed! And lots of other differences...
Source code: test_abs-non-intel.s (src)
absolute_value:
pushl %eax
movl 8(%esp), %eax
movl %eax, (%esp)
cmpl $0, (%esp)
jge .LBB1_2
movl $0, %eax
subl (%esp), %eax
movl %eax, (%esp)
.LBB1_2:
movl (%esp), %eax
popl %edx
ret
g++ -S
producesmovl
, pushl
, cmpl
, subl
, popl
, etc.$0
is the constant 08(%ebp)
is [ebp+8]g++ -S
uses by default-mllvm --x86-asm-syntax=intel
flagsclang++ -m32 -mllvm --x86-asm-syntax=intel -S \
test_abs.cpp -o test_abs.s
-masm=intel
flags:g++ -m32 -masm=intel -S test_abs.cpp -o test_abs.s
This is with the -masm=intel
flags
Source code: test_abs.s (src)
absolute_value:
push ebp
mov ebp, esp
cmp DWORD PTR [ebp+8], 0
jns .L2
neg DWORD PTR [ebp+8]
.L2:
mov eax, DWORD PTR [ebp+8]
pop ebp
ret
jns
means jump if not signed (i.e. if positive)
-masm=intel
flag-mllvm --x86-asm-syntax=intel
flagsSource code: test_abs_c.c (src)
#include <stdio.h>
int absolute_value(int x) {
if ( x < 0 ) // if x is negative
x = -x; // negate x
return x; // return x
}
int main() {
int theValue=0;
printf ("Enter a value: \n");
scanf ("%d", &theValue);
int theResult = absolute_value(theValue);
printf ("The result is: %d\n", theResult);
return 0;
}
(see next slide for the source code link)
int max(int x, int y) {
int theMax;
if (x > y) // if x > y then x is max
theMax = x;
else // else y is the max
theMax = y;
return theMax; // return the max
}
Source code: test_max.cpp (src)
#include <iostream>
using namespace std;
extern "C" int max(int x, int y);
// the max function from the previous slide
int main() {
int theValue1=0, theValue2=0;
cout << "Enter value 1: " << endl;
cin >> theValue1;
cout << "Enter value 2: " << endl;
cin >> theValue2;
int theResult = max(theValue1, theValue2);
cout << "The result is: " << theResult << endl;
return 0;
}
Using: g++ -S -m32 -masm=intel
Source code: test_max.s (src)
max:
push ebp
mov ebp, esp
sub esp, 16
mov eax, DWORD PTR [ebp+8]
cmp eax, DWORD PTR [ebp+12]
jle .L2
mov eax, DWORD PTR [ebp+8]
mov DWORD PTR [ebp-4], eax
jmp .L3
.L2:
mov eax, DWORD PTR [ebp+12]
mov DWORD PTR [ebp-4], eax
.L3:
mov eax, DWORD PTR [ebp-4]
leave
ret
Using: g++ -S -m32 -masm=intel
and -O2
Source code: test_max-O2.s (src)
max:
mov eax, DWORD PTR [esp+4]
mov edx, DWORD PTR [esp+8]
cmp edx, eax
cmovge eax, edx
ret
The cmovg opcode (conditional move if greater than) will move the greater value into the first parameter; cmovge does the move greater than or equal to value
Using: g++ -S -m32 -masm=intel
Source code: test_max-noextern.s (src)
_Z3maxii:
push ebp
mov ebp, esp
sub esp, 16
mov eax, DWORD PTR [ebp+8]
cmp eax, DWORD PTR [ebp+12]
jle .L2
mov eax, DWORD PTR [ebp+8]
mov DWORD PTR [ebp-4], eax
jmp .L3
.L2:
mov eax, DWORD PTR [ebp+12]
mov DWORD PTR [ebp-4], eax
.L3:
mov eax, DWORD PTR [ebp-4]
leave
ret
(see next slide for the source code link)
bool compare_string (const char *theStr1,
const char *theStr2) {
// while *theStr1 is not NULL terminator
// and the current corresponding bytes are equal
while( (*theStr1 != NULL)
&& (*theStr1 == *theStr2) ) {
theStr1++; // increment the pointers to
theStr2++; // the next char / byte
}
return (*theStr1==*theStr2);
}
Source code: test_string_compare.cpp (src)
#include <iostream>
#include <string>
using namespace std;
extern "C" bool compare_string(const char* theStr1,
const char* theStr2);
// code for compare_string here
int main() {
string theValue1, theValue2;
cout << "Enter string 1: " << endl;
cin >> theValue1;
cout << "Enter string 2: " << endl;
cin >> theValue2;
bool theResult = compare_string(theValue1.c_str(),
theValue2.c_str());
cout << "The result is: " << theResult << endl;
return 0;
}
Using: g++ -S -m32 -masm=intel
Source code: test_string_compare.s (src)
compare_string:
push ebp
mov ebp, esp
jmp .L2
.L5:
add DWORD PTR [ebp+8], 1
add DWORD PTR [ebp+12], 1
Using: g++ -S -m32 -masm=intel
Source code: test_string_compare.s (src)
.L2:
mov eax, DWORD PTR [ebp+8]
movzx eax, BYTE PTR [eax]
test al, al
je .L3
mov eax, DWORD PTR [ebp+8]
movzx edx, BYTE PTR [eax]
mov eax, DWORD PTR [ebp+12]
movzx eax, BYTE PTR [eax]
cmp dl, al
jne .L3
mov eax, 1
jmp .L4
Using: g++ -S -m32 -masm=intel
Source code: test_string_compare.s (src)
.L3:
mov eax, 0
.L4:
test al, al
jne .L5
mov eax, DWORD PTR [ebp+8]
movzx edx, BYTE PTR [eax]
mov eax, DWORD PTR [ebp+12]
movzx eax, BYTE PTR [eax]
cmp dl, al
sete al
pop ebp
ret
(see next slide for the source code link)
int fib(unsigned int n) {
if ((n==0) || (n==1))
return 1;
return fib(n-1) + fib(n-2);
}
Source code: test_fib.cpp (src)
#include <iostream>
using namespace std;
extern "C" int fib(unsigned int n);
int fib(unsigned int n) {
if ((n==0) || (n==1))
return 1;
return fib(n-1) + fib(n-2);
}
int main() {
int theValue = 0;
cout << "Enter value for fib(): " << endl;
cin >> theValue;
int theResult = fib(theValue);
cout << "The result is: " << theResult << endl;
return 0;
}
Using: g++ -S -m32 -masm=intel
Source code: test_fib.s (src)
fib:
push ebp
mov ebp, esp
push ebx
sub esp, 20
cmp DWORD PTR [ebp+8], 0
je .L2
cmp DWORD PTR [ebp+8], 1
jne .L3
.L2:
mov eax, 1
jmp .L4
Using: g++ -S -m32 -masm=intel
Source code: test_fib.s (src)
.L3:
mov eax, DWORD PTR [ebp+8]
sub eax, 1
mov DWORD PTR [esp], eax
call fib
mov ebx, eax
mov eax, DWORD PTR [ebp+8]
sub eax, 2
mov DWORD PTR [esp], eax
call fib
add eax, ebx
.L4:
add esp, 20
pop ebx
pop ebp
ret
|
movb
, movw
, or movl
movb
, that instruction moves the least significant 8 bits of the 32-bit quantityint
to a short
, you just tke lowest 16 bits, which means you use a movw
instead of a movl