Overview
We are given a bunch of files:
First run:
$ ./main
Usage: ./main flag_checker.tfl <flag>
When we attempt to strace the binary, we see a lot of things being read and checked:
openat(AT_FDCWD, "romfs/damocles.bin", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=512, ...}) = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=512, ...}) = 0
lseek(5, 0, SEEK_SET) = 0
read(5, "\250c\6\375ip\330\7\3B\206\326\214\373\263\30\5\265\216L\212|\327\233S,\224?\230\377\272W"..., 512) = 512
lseek(5, 512, SEEK_SET) = 512
close(5) = 0
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(5, "00400000-00402000 r--p 00000000 "..., 1024) = 1024
mmap(NULL, 344064, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8b0c8a7000
munmap(0x7f8b0c8a7000, 344064) = 0
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 6
fstat(6, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(6, "00400000-00402000 r--p 00000000 "..., 1024) = 1024
brk(0x222b000) = 0x222b000
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(7, "00400000-00402000 r--p 00000000 "..., 1024) = 1024
openat(AT_FDCWD, "romfs/blob", O_RDONLY) = 8
fstat(8, {st_mode=S_IFREG|0644, st_size=1024, ...}) = 0
fstat(8, {st_mode=S_IFREG|0644, st_size=1024, ...}) = 0
lseek(8, 0, SEEK_SET) = 0
read(8, "\237a\355\314 \234\335\205c\302cyl\236\323tN\326\336\254J\264\340\2\244\1\317!\346\17G-"..., 1024) = 1024
lseek(8, 1024, SEEK_SET) = 1024
close(8) = 0
getrandom("\xab\xf2\x27\x42\x11\xdc\xfc\x9c\xae\xcb\x15\x44\x88\x93\xfc\x70\x08\xad\x62\xa0\xeb\x0d\x67\x13\x50\xb1\x47\x8d\x8d\x68\x61\x42"..., 128, 0) = 128
getrandom("\xa7\xc9\x9e\xf2\x28\xab\x33\xd7\x14\xcd\x7d\x5c\x6c\x51\xbc\x66\xb6\xec\x08\x6e\x48\xe2\x37\xd1\x67\x9a\xc1\x96\xea\xce\xd0\x63"..., 128, 0) = 128
openat(AT_FDCWD, "romfs/dragonfly.bin", O_RDONLY) = 8
fstat(8, {st_mode=S_IFREG|0644, st_size=256, ...}) = 0
fstat(8, {st_mode=S_IFREG|0644, st_size=256, ...}) = 0
lseek(8, 0, SEEK_SET) = 0
read(8, "\231<6\241\204\0\337z\266\37\344q47\313\232\263)G\326\264\271\2741\234\365\200\220C\275z\337"..., 256) = 256
lseek(8, 256, SEEK_SET) = 256
close(8) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x4), ...}) = 0
write(1, "Incorrect!\n", 11Incorrect!
) = 11
lseek(7, -691, SEEK_CUR) = 333
lseek(6, -691, SEEK_CUR) = 333
lseek(5, -802, SEEK_CUR) = 222
lseek(4, -691, SEEK_CUR) = 333
lseek(3, -802, SEEK_CUR) = 222
exit_group(0) = ?
+++ exited with 0 +++
However, if we try to step through the binary, we exit with code 0x1; the binary seems to detect breakpoints.
Static analysis
The first step was a lot of static reverse engineering in IDA Pro. I chose IDA because of public function signatures and as it seemed to decompile the code best.
Apart from the flag_checker.tfl, there’s also the hello_world.tfl with completely different behavior. It seems like the flag checking part is embedded as instructions for a custom VM inside of the tinfoil-protected file.
There is a huge function at 0x404E90 that defined a list of functions addresses, probably the opcodes for the VM. The binary seems to be written in an object-oriented language and guessing some types, I was able to guess what most of these functions do:
The main control flow works like this:
Dynamic analysis
During the static analysis, I already realized that this was going to be very hard to pull of statically. I don’t know what exactly happens in every function except for an approximate guess.
This is why I decided to hook the VM execution with libdebug.
For this, I did the following cursed things:
- Export function names of the VM instructons from IDA
- Lots of custom formatting to have a nice output
- Tracing back the stack
This looks like this:
#!/usr/bin/env python3
from libdebug import debugger
import sys
from pwn import enhex, u64
from string import printable
import json
from rich import print
functions=json.load(open("./funcs.json")) # load functions from IDA export
bname = sys.argv[-1] if len(sys.argv) > 1 else "flag_checker.tfl"
d = debugger(argv=["./main", bname, "dach2026{some_flag_1337}"]) #}
# resolve memory, if it's a pointer
def res(x):
try:
return d.memory.telescope(x)[-1]
except ValueError:
return x
# visualize pointer (to ascii or hex, if not printable)
def vis(x):
if isinstance(x, int):
try:
s = x.to_bytes((x.bit_length() + 7) // 8, 'little')
if any(chr(y) not in printable for y in s):
return hex(x)
return s
except UnicodeDecodeError:
return hex(x)
return x[:20]
# get a nice line (lol, not even in use anymore)
def hr(x=""):
print("-"*len(x) if len(x) else "-"*20)
print(x)
print("-"*len(x) if len(x) else "-"*20)
# resolve fn name from IDA export
def getname(addr):
for k,v in functions.items():
if int(k,16) == addr:
return v
return hex(addr)
io = d.run()
#aes_call = d.bp(0x410545, hardware=True, file="binary")
#sthsth_crypto = d.bp(0x410530, hardware=True, file="binary")
#after_aes = d.bp(0x0410553, hardware=True, file="binary")
#vm_instr = d.bp(0x4051E3, hardware=True, file="binary")
vm_call = d.bp(0x405211, hardware=True, file="binary")
d.cont()
def loop():
global dl
if vm_call.hit_on(d):
# main rip / instr / regs view
print(
f"[yellow]prog+{hex(res(d.regs.rdx))[2:].ljust(2,"0")}[/yellow]\t" # offset
f"[green]{hex(res(d.regs.rsi & 0xff))[2:].ljust(2,"0")}[/green]\t"
f"[cyan]{getname(d.regs.rax).rjust(30, " ")}[/cyan]\t" # instr
f"{vis(res(d.regs.rcx))}\t", # args
end="")
ctx = d.regs.rbx
# visually compare stuff
if getname(d.regs.rax) == "compare":
instr = d.regs.rsi
slots = u64(d.memory[ctx + 0x30, 8])
operand = u64(d.memory[instr + 8, 8]) & 0xffffffff
opcode = u64(d.memory[instr + 0, 8])
opcode_hi = (opcode >> 32) & 0xffffffff
print()
dl = []
for idx in [operand, opcode_hi]:
obj = u64(d.memory[slots + idx*8, 8])
if obj:
data = u64(d.memory[obj + 0x10, 8])
size = u64(d.memory[obj + 0x18, 8])
buf = d.memory[data, size]
dl.append(enhex(buf))
for c in range(len(dl[0])):
if dl[0][c] == dl[1][c]:
print(f"[green]{dl[0][c]}[/green]", end="")
else:
print(f"[red]{dl[0][c]}[/red]", end="")
print()
# print stack
stack_begin = u64(d.memory[ctx + 0x18, 8])
stack_end = u64(d.memory[ctx + 0x20, 8])
p = stack_begin
i = 0
while p < stack_end:
obj = u64(d.memory[p, 8])
data = u64(d.memory[obj + 0x10, 8])
size = u64(d.memory[obj + 0x18, 8])
if data:
buf = d.memory[data, size]
try:
hexdata = buf.decode()
except:
hexdata = enhex(buf)[:16]
print(f"s[{i}]({size})={hexdata}, ", end="")
p += 8
i += 1
print()
# make sure we can hit the BP multiple times
d.cont()
return loop()
loop()
For the function export, I hacked together a cursed .idc script:
#include <idc.idc>
static main()
{
auto f, name, fp, first;
fp = fopen("funcs.json", "w");
if (fp == 0)
{
Warning("x_x");
return;
}
fprintf(fp, "{\n");
first = 1;
for (f = get_next_func(0); f != BADADDR; f = get_next_func(f))
{
name = get_func_name(f);
if (!first)
fprintf(fp, ",\n");
fprintf(fp, " \"0x%X\": \"%s\"", f, name);
first = 0;
}
fprintf(fp, "\n}\n");
fclose(fp);
Message("done!");
}
Putting it all together
The data from the dynamic analysis showed pretty clearly that my input was bit-shifted by 1 multiple times after a bitwise NOT.
While hooking the comparison function, I noticed that the first parts of the result of my input and “the flag” were a match.
As the numbers kept getting smaller and smaller with each bit shift, I came to the conclusion, that the last round must all be the same, as any negated printable ascii character shifted 7 times will just be 1:
>>> import string
>>> all((~ord(x) % 255 >> 7) == 1 for x in string.printable)
True
This is the program logic that I came up with in the end:
So, what does this mean? Based on the existing matching hash part, we know the length of a hash.
This leaves us with the following chunks:
chunks = [
'993c36a18400df7ab61fe4713437cb9ab32947d6b4b9bc319cf5809043bd7adf', # >> 7
'f4b6e9112c04ed738a8181131c3b8c58f2cbd5fd5df46481e4b9ad0c68fd0188', # >> 6
'54bc8a04e5277262430193b8be736c770a3b089e4863ac81b9df6b870900fbe4', # >> 5
'4e7917200611ebd09a459cb0d898bf91377caf65a7bf0a684a1ad81ed71706b1', # >> 4
'b76af9c2089cdcaf26432ce734905bb4392974de7d89f9def83caa5c0a7b1938', # >> 3
'bc9ac2ac2c4e4b2a059ec5084aed7452fd9074c62f54640c9e1d6031116e466d', # >> 2
'310b06b46440231293ae39856612c634d3be3f2f6513582e86fe4a4260973893', # >> 1
'561a856949620126623bc884472aa4e8236b770faf1c70e362bc8b8fa8559d70' # >> 0
]
For every round, we’ll have to brute \(2^{29}\) possible bit combinations, which can be a bit of a pain. Once we reach the last chunk, we should be able to apply a bitwise NOT to the original input again and then get the flag.
I initially wrote a script in python, then just gave my implementation without any other context to an LLM and asked to rewrite it in rust.
It definitely cooked something up; the resulting rust code took 10 seconds to finish while my original python code had a predicted runtime of 30 minutes:
use rayon::prelude::*;
use sha2::{Digest, Sha256};
use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
const CHUNKS: [&str; 8] = [
"993c36a18400df7ab61fe4713437cb9ab32947d6b4b9bc319cf5809043bd7adf",
"f4b6e9112c04ed738a8181131c3b8c58f2cbd5fd5df46481e4b9ad0c68fd0188",
"54bc8a04e5277262430193b8be736c770a3b089e4863ac81b9df6b870900fbe4",
"4e7917200611ebd09a459cb0d898bf91377caf65a7bf0a684a1ad81ed71706b1",
"b76af9c2089cdcaf26432ce734905bb4392974de7d89f9def83caa5c0a7b1938",
"bc9ac2ac2c4e4b2a059ec5084aed7452fd9074c62f54640c9e1d6031116e466d",
"310b06b46440231293ae39856612c634d3be3f2f6513582e86fe4a4260973893",
"561a856949620126623bc884472aa4e8236b770faf1c70e362bc8b8fa8559d70",
];
const LIMIT: u64 = 1 << 29;
const N: usize = 29;
fn solve_block(block: usize, target: &[u8; 32], initial: &[u64; N]) -> Option<[u64; N]> {
// Precompute which bits in initial are already set (mask of locked bits per position)
// initial[e] contributes upper bits; we OR in bit e of i as the new LSB
let found = AtomicBool::new(false);
let result_i = AtomicI64::new(-1);
// Chunk the search space across threads; each thread owns a contiguous range
// Use chunk_size tuned for ~4K iterations per task to balance overhead vs granularity
let chunk_size: u64 = 4096;
let num_chunks = (LIMIT + chunk_size - 1) / chunk_size;
(0..num_chunks).into_par_iter().for_each(|chunk_idx| {
if found.load(Ordering::Relaxed) {
return;
}
let start_i = chunk_idx * chunk_size;
let end_i = (start_i + chunk_size).min(LIMIT);
let mut buf = [0u8; N];
for i in start_i..end_i {
if found.load(Ordering::Relaxed) {
return;
}
// Build the 29-byte input:
// start[e] = initial[e] | ((i >> e) & 1)
for e in 0..N {
buf[e] = (initial[e] | ((i >> e) & 1)) as u8;
}
let hash = Sha256::digest(&buf);
if hash.as_slice() == target {
result_i.store(i as i64, Ordering::SeqCst);
found.store(true, Ordering::SeqCst);
return;
}
}
});
let i = result_i.load(Ordering::SeqCst);
if i < 0 {
return None;
}
let i = i as u64;
// Reconstruct the winning `start` array
let mut start = [0u64; N];
for e in 0..N {
start[e] = initial[e] | ((i >> e) & 1);
}
Some(start)
}
fn main() {
// Match Python: initial = [2]*29
let mut initial = [2u64; N];
let mut start_res = [2u64; N];
for block in 1..8usize {
let raw_hex = CHUNKS[block];
let raw_bytes = hex::decode(raw_hex).expect("Invalid hex in CHUNKS");
let mut target = [0u8; 32];
target.copy_from_slice(&raw_bytes);
println!("Searching block {block}...");
let start_time = std::time::Instant::now();
start_res = match solve_block(block, &target, &initial) {
Some(start) => {
let elapsed = start_time.elapsed();
println!("solved {block} in {elapsed:.2?}");
// initial = [o << 1 for o in start]
println!("initial: {start:?}");
for e in 0..N {
initial[e] = start[e] << 1;
}
start
}
None => {
println!("Block {block}: no solution found in [0, {LIMIT})");
break;
}
};
}
let s: String = start_res
.iter()
.map(|&n| (!n) as u8 as char)
.collect();
println!("\n------------> {} <---------------", s); // flag :D
}
Finally, this is how it all looks in action:
Flag: