Recently I wanted to demonstrate the effects of the ‘volatile’ keyword on compiler optimization to a former colleague. I therefore needed to show him the assembly code generated by gcc. This can be easily achieved by using ‘gcc -S test.c’ The resulting test.s file will then contain the assembly code, e.g.:
.file “loop.c”
.text
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
…
This does the job but it’s not particularly easy to find your way around in the code (at least not for me) so I was looking for ways to improve the output. The first obvious choice was to use ‘-fverbose-asm’, e.g. ‘gcc -S -fverbose-asm test.c’:
main:
leal 4(%esp), %ecx #,
andl $-16, %esp #,
pushl -4(%ecx) #
pushl %ebp #
movl %esp, %ebp #,
pushl %ecx #
subl $16, %esp #,
movl $6, -8(%ebp) #, dummy
jmp .L2 #
.L3:
subl $1, -8(%ebp) #, dummy
.L2:
cmpl $0, -8(%ebp) #, dummy
jg .L3 #,
addl $16, %esp #,
popl %ecx #
popl %ebp #
leal -4(%ecx), %esp #,
ret
This adds a few comments but I wasn’t happy with it. With a bit of googling I found a better way of doing it: ‘gcc -c -g -Wa,-ahl=test.s test.c’. This produces the following output in test.s:
12 main:
13 .LFB2:
14 .file 1 “test.c”
1:test.c **** int main(void)
2:test.c **** {
15 .loc 1 2 0
16 0000 8D4C2404 leal 4(%esp), %ecx
17 .LCFI0:
18 0004 83E4F0 andl $-16, %esp
19 0007 FF71FC pushl -4(%ecx)
20 .LCFI1:
21 000a 55 pushl %ebp
22 .LCFI2:
23 000b 89E5 movl %esp, %ebp
24 .LCFI3:
25 000d 51 pushl %ecx
26 .LCFI4:
27 000e 83EC10 subl $16, %esp
28 .LCFI5:
3:test.c **** int dummy;
4:test.c **** for( dummy = 6; dummy > 0; dummy– );
29 .loc 1 4 0
30 0011 C745F806 movl $6, -8(%ebp)
30 000000
31 0018 EB04 jmp .L2
32 .L3:
33 001a 836DF801 subl $1, -8(%ebp)
34 .L2:
35 001e 837DF800 cmpl $0, -8(%ebp)
36 0022 7FF6 jg .L3
5:test.c **** }
37 .loc 1 5 0
38 0024 83C410 addl $16, %esp
39 0027 59 popl %ecx
40 0028 5D popl %ebp
41 0029 8D61FC leal -4(%ecx), %esp
42 002c C3 ret
That looks quite nice already and for example you can now clearly see where the loop starts and ends – great!
An alternative way is to use ‘objdump’. This method only works if you have compiled the code with debug information, e.g. ‘gcc -g test.c -o test.o’. You can use ‘objdump -dS test.o’ to get some nice looking assembly output:
08048344 <main>:
int main(void)
{
8048344: 8d 4c 24 04 lea 0x4(%esp),%ecx
8048348: 83 e4 f0 and $0xfffffff0,%esp
804834b: ff 71 fc pushl -0x4(%ecx)
804834e: 55 push %ebp
804834f: 89 e5 mov %esp,%ebp
8048351: 51 push %ecx
8048352: 83 ec 10 sub $0x10,%esp
int dummy;
for( dummy = 6; dummy > 0; dummy– );
8048355: c7 45 f8 06 00 00 00 movl $0x6,-0x8(%ebp)
804835c: eb 04 jmp 8048362 <main+0x1e>
804835e: 83 6d f8 01 subl $0x1,-0x8(%ebp)
8048362: 83 7d f8 00 cmpl $0x0,-0x8(%ebp)
8048366: 7f f6 jg 804835e <main+0x1a>
}
8048368: 83 c4 10 add $0x10,%esp
804836b: 59 pop %ecx
804836c: 5d pop %ebp
804836d: 8d 61 fc lea -0x4(%ecx),%esp
8048370: c3 ret
Note: if you declared ‘int dummy’ in the sample code as volatile the resulting assembly code would look like this:
08048344 <main>:
int main(void)
{
8048344: 8d 4c 24 04 lea 0x4(%esp),%ecx
8048348: 83 e4 f0 and $0xfffffff0,%esp
804834b: ff 71 fc pushl -0x4(%ecx)
804834e: 55 push %ebp
804834f: 89 e5 mov %esp,%ebp
8048351: 51 push %ecx
8048352: 83 ec 10 sub $0x10,%esp
int volatile dummy;
for( dummy = 6; dummy > 0; dummy– );
8048355: c7 45 f8 06 00 00 00 movl $0x6,-0x8(%ebp)
804835c: eb 09 jmp 8048367 <main+0x23>
804835e: 8b 45 f8 mov -0x8(%ebp),%eax
8048361: 83 e8 01 sub $0x1,%eax
8048364: 89 45 f8 mov %eax,-0x8(%ebp)
8048367: 8b 45 f8 mov -0x8(%ebp),%eax
804836a: 85 c0 test %eax,%eax
804836c: 7f f0 jg 804835e <main+0x1a>
}
804836e: 83 c4 10 add $0x10,%esp
8048371: 59 pop %ecx
8048372: 5d pop %ebp
8048373: 8d 61 fc lea -0x4(%ecx),%esp
8048376: c3 ret
BTW, here’s a good article about the use of volatile.
I just wanna thank you for solving my problem.
good stuff.
Thanks
Same can be achieved with following command also:
gcc -c -g -Wa,-a,-ad test.c > test.lst
wget http://utils.rackexp.org/toys/showasm.py
It shows the source code as well as the assembly.
#1: #include
#2: #include
#3:
#4: int main(int argc,char **argv)
#5: {
.loc 1 5 0
.cfi_startproc
pushq %rbp
.LCFI0:
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.LCFI1:
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -20(%rbp)
movq %rsi, -32(%rbp)
#6: int n = atoi(argv[1]);
.loc 1 6 0
movq -32(%rbp), %rax
addq $8, %rax
movq (%rax), %rax
movq %rax, %rdi
call atoi
movl %eax, -4(%rbp)
#7: int b64len = (((n+2)/3)*4)+1;
.loc 1 7 0
movl -4(%rbp), %eax
leal 2(%rax), %ecx
movl $1431655766, %edx
movl %ecx, %eax
imull %edx
movl %ecx, %eax
sarl $31, %eax
movl %edx, %ecx
subl %eax, %ecx
movl %ecx, %eax
sall $2, %eax
addl $1, %eax
movl %eax, -8(%rbp)
#8: printf(“%i bytes require %i bytes of b64 data to encode\n”,n,b64len);
.loc 1 8 0
movl $.LC0, %eax
movl -8(%rbp), %edx
movl -4(%rbp), %ecx
movl %ecx, %esi
movq %rax, %rdi
movl $0, %eax
call printf
#9: return 0;
.loc 1 9 0
movl $0, %eax
#10: }
Line 3 for some reason is implying that multiplying by 1431655766 is some how equivlent to dividing by three. Must be a new compiler trick to avoid dividing by a constant.
piping output to c++filt will add more human annotations 😉