Skip to content

Commit f6fbe42

Browse files
committed
Initial public commit
0 parents  commit f6fbe42

33 files changed

+3894
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/*.sublime-*
2+
/build*
3+
/install

.gitmodules

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[submodule "subprojects/rellume"]
2+
path = subprojects/rellume
3+
url = https://github.com/aengelke/rellume.git
4+
[submodule "subprojects/fadec"]
5+
path = subprojects/fadec
6+
url = https://github.com/aengelke/fadec.git

LICENSE

+456
Large diffs are not rendered by default.

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Instrew — LLVM-based Dynamic Binary Instrumentation
2+
3+
Instrew is a performance-targeted transparent dynamic binary rewriter/instrumenter based on LLVM targeting x86-64. The original code is lifted to LLVM-IR using [Rellume](https://github.com/aengelke/rellume), where it can be modified and from which new machine code is generated using LLVM's MCJIT compiler.
4+
5+
### Architecture
6+
7+
Instrew implements a two-process client/server architecture: the light-weight client contains the guest address space as well as the code cache and controls execution, querying rewritten objects as necessary from the server. The server performs lifting (requesting instruction bytes from the client when required), instrumentation, and code generation and sends back an ELF object file. When receiving a new object file, the client resolves missing symbols and applies relocations.
8+
9+
### Publications
10+
11+
- Alexis Engelke and Martin Schulz. 2020. Instrew: Leveraging LLVM for High Performance Dynamic Binary Instrumentation. In 16th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments (VEE ’20), March 17, 2020, Lausanne, Switzerland. *Accepted. Link will follow.*
12+
13+
### License
14+
Instrew is licensed under LGPLv2.1+.

client/common.h

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
#ifndef COMMON_H
3+
#define COMMON_H
4+
5+
#include <stdarg.h>
6+
#include <stdbool.h>
7+
#include <stddef.h>
8+
#include <stdint.h>
9+
10+
#include <linux/errno.h>
11+
#include <linux/fcntl.h>
12+
#include <linux/time.h>
13+
#include <linux/unistd.h>
14+
15+
#define ssize_t intptr_t
16+
#define off_t intptr_t
17+
18+
extern char **environ;
19+
20+
long syscall(long, long, long, long, long, long, long);
21+
__attribute__((noreturn)) void _exit(int status);
22+
int getpid(void);
23+
24+
int clock_gettime(int clk_id, struct timespec* tp);
25+
26+
int open(const char* pathname, int flags, int mode);
27+
int openat(int dirfd, const char* pathname, int flags, int mode);
28+
off_t lseek(int fd, off_t offset, int whence);
29+
ssize_t read(int fd, void* buf, size_t count);
30+
ssize_t write(int fd, const void* buf, size_t count);
31+
int close(int fd);
32+
int fcntl(int fd);
33+
34+
ssize_t read_full(int fd, void* buf, size_t nbytes);
35+
ssize_t write_full(int fd, const void* buf, size_t nbytes);
36+
37+
// sys/auxv.h
38+
unsigned long int getauxval(unsigned long int __type);
39+
40+
// sys/mman.h
41+
void* mmap(void* addr, size_t length, int prot, int flags, int fd,
42+
off_t offset);
43+
int munmap(void* addr, size_t length);
44+
int mprotect(void* addr, size_t len, int prot);
45+
46+
// stdio.h
47+
int vsnprintf(char* str, size_t size, const char* restrict format, va_list args);
48+
int snprintf(char* str, size_t size, const char* restrict format, ...);
49+
int vdprintf(int fd, const char* restrict format, va_list args);
50+
int dprintf(int fd, const char* restrict format, ...);
51+
int printf(const char* restrict format, ...);
52+
int puts(const char* s);
53+
54+
// strings.h
55+
size_t strlen(const char* s);
56+
int strcmp(const char* s1, const char* s2);
57+
int strncmp(const char* s1, const char* s2, size_t n);
58+
char* strchr(const char* s, int c);
59+
void* memset(void* s, int c, size_t n);
60+
int memcmp(const void* s1, const void* s2, size_t n);
61+
void* memcpy(void* dest, const void* src, size_t n);
62+
63+
int execve(const char* filename, const char* const argv[], const char* const envp[]);
64+
int dup2(int oldfd, int newfd);
65+
int pipe2(int pipefd[2], int flags);
66+
int __clone(int (*func)(void *), void *stack, int flags, void *arg, ...);
67+
68+
#define STRINGIFY_ARG(x) #x
69+
#define STRINGIFY(x) STRINGIFY_ARG(x)
70+
71+
#define PASTE_ARGS(a,b) a ## b
72+
#define PASTE(a,b) PASTE_ARGS(a, b)
73+
74+
#define ALIGN_DOWN(v,a) ((v) & ~((a)-1))
75+
#define ALIGN_UP(v,a) (((v) + (a - 1)) & ~((a)-1))
76+
77+
#define LIKELY(x) __builtin_expect((x), 1)
78+
#define UNLIKELY(x) __builtin_expect((x), 0)
79+
80+
#define ASM_BLOCK(...) __asm__(#__VA_ARGS__)
81+
82+
#if __SIZEOF_POINTER__ == 8
83+
#define BAD_ADDR(a) (((uintptr_t) (a)) > 0xfffffffffffff000ULL)
84+
#else
85+
#define BAD_ADDR(a) (((uintptr_t) (a)) > 0xfffff000UL)
86+
#endif
87+
88+
#endif

client/elf-loader.c

+248
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
2+
#include <common.h>
3+
#include <elf.h>
4+
#include <linux/fcntl.h>
5+
#include <linux/fs.h>
6+
#include <linux/mman.h>
7+
8+
#include <elf-loader.h>
9+
10+
11+
#define elf_check_arch(x) \
12+
((x)->e_machine == EM_X86_64)
13+
14+
#define PAGESIZE ((size_t) 0x1000)
15+
16+
static Elf_Phdr* load_elf_phdrs(Elf_Ehdr* elf_ex, int fd) {
17+
Elf_Phdr* phdata = NULL;
18+
int err = -1;
19+
20+
if (elf_ex->e_phentsize != sizeof(Elf_Phdr))
21+
goto out;
22+
23+
if (elf_ex->e_phnum < 1 || elf_ex->e_phnum > 65536U / sizeof(Elf_Phdr))
24+
goto out;
25+
26+
size_t size = sizeof(Elf_Phdr) * elf_ex->e_phnum;
27+
if (size > 0x1000)
28+
goto out;
29+
30+
phdata = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
31+
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
32+
if (BAD_ADDR(phdata)) {
33+
phdata = NULL;
34+
goto out;
35+
}
36+
37+
if (lseek(fd, elf_ex->e_phoff, SEEK_SET) == -1)
38+
goto out;
39+
40+
if (read_full(fd, phdata, size) == -1)
41+
goto out;
42+
43+
err = 0;
44+
45+
out:
46+
if (err && phdata != NULL) {
47+
munmap(phdata, 0x1000);
48+
phdata = NULL;
49+
}
50+
51+
return phdata;
52+
}
53+
54+
static size_t elf_mapping_size(Elf_Phdr* elf_phdata, size_t num_ph) {
55+
unsigned has_first = 0;
56+
uintptr_t start = 0;
57+
uintptr_t end = 0;
58+
for (size_t i = 0; i < num_ph; i++) {
59+
if (elf_phdata[i].p_type != PT_LOAD)
60+
continue;
61+
if (!has_first) {
62+
has_first = 1;
63+
start = ALIGN_DOWN(elf_phdata[i].p_vaddr, PAGESIZE);
64+
}
65+
end = elf_phdata[i].p_vaddr + elf_phdata[i].p_memsz;
66+
}
67+
return end - start;
68+
}
69+
70+
static int
71+
elf_map(uintptr_t addr, Elf_Phdr* elf_ppnt, int fd) {
72+
int retval;
73+
74+
int prot = 0;
75+
if (elf_ppnt->p_flags & PF_R)
76+
prot |= PROT_READ;
77+
if (elf_ppnt->p_flags & PF_W)
78+
prot |= PROT_WRITE;
79+
if (elf_ppnt->p_flags & PF_X) {
80+
// Never map code as executable, since the host can't execute it.
81+
}
82+
83+
uintptr_t mapstart = ALIGN_DOWN(addr, PAGESIZE);
84+
uintptr_t mapend = ALIGN_UP(addr + elf_ppnt->p_filesz, PAGESIZE);
85+
uintptr_t dataend = addr + elf_ppnt->p_filesz;
86+
uintptr_t allocend = addr + elf_ppnt->p_memsz;
87+
uintptr_t mapoff = ALIGN_DOWN(elf_ppnt->p_offset, PAGESIZE);
88+
89+
if (mapend > mapstart) {
90+
void* mapret = mmap((void*) mapstart, mapend - mapstart, prot,
91+
MAP_PRIVATE|MAP_FIXED, fd, mapoff);
92+
if (BAD_ADDR(mapret)) {
93+
puts("map (file)");
94+
retval = (int) (uintptr_t) mapret;
95+
goto out;
96+
}
97+
}
98+
99+
if (allocend > dataend)
100+
{
101+
uintptr_t zeropage = ALIGN_UP(dataend, PAGESIZE);
102+
if (allocend < zeropage)
103+
zeropage = allocend;
104+
105+
if (zeropage > dataend)
106+
{
107+
// We have data at the last page of the segment that has to be
108+
// zeroed. If necessary, we have to give write privileges
109+
// temporarily.
110+
if ((prot & PROT_WRITE) == 0) {
111+
puts("zero (page end)");
112+
retval = mprotect((void*) ALIGN_DOWN(dataend, PAGESIZE),
113+
PAGESIZE, prot | PROT_WRITE);
114+
if (retval < 0)
115+
goto out;
116+
}
117+
memset((void*) dataend, 0, zeropage - dataend);
118+
if ((prot & PROT_WRITE) == 0) {
119+
mprotect((void*) ALIGN_DOWN(dataend, PAGESIZE), PAGESIZE,
120+
prot);
121+
}
122+
}
123+
124+
// We have entire pages that have to be zeroed.
125+
if (allocend > zeropage) {
126+
void* mapret = mmap((void*) zeropage, allocend - zeropage, prot,
127+
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
128+
if (BAD_ADDR(mapret)) {
129+
puts("map (zero)");
130+
retval = (int) (uintptr_t) mapret;
131+
goto out;
132+
}
133+
}
134+
}
135+
136+
retval = 0;
137+
138+
out:
139+
return retval;
140+
}
141+
142+
// static int
143+
// load_elf_interp(const char* filename)
144+
145+
int load_elf_binary(const char* filename, BinaryInfo* out_info) {
146+
int retval;
147+
int i;
148+
Elf_Phdr* elf_ppnt;
149+
150+
int fd = open(filename, O_RDONLY, 0);
151+
if (fd < 0) {
152+
retval = fd;
153+
goto out;
154+
}
155+
156+
Elf_Ehdr elfhdr_ex;
157+
retval = read_full(fd, &elfhdr_ex, sizeof(Elf_Ehdr));
158+
if (retval < 0)
159+
goto out_close;
160+
161+
retval = -ENOEXEC;
162+
if (memcmp(&elfhdr_ex, ELFMAG, SELFMAG) != 0)
163+
goto out_close;
164+
165+
if (elfhdr_ex.e_type != ET_EXEC && elfhdr_ex.e_type != ET_DYN)
166+
goto out_close;
167+
168+
if (!elf_check_arch(&elfhdr_ex))
169+
goto out_close;
170+
171+
Elf_Phdr* elf_phdata = load_elf_phdrs(&elfhdr_ex, fd);
172+
if (elf_phdata == NULL) {
173+
puts("Could not load phdata");
174+
goto out_close;
175+
}
176+
177+
for (i = 0, elf_ppnt = elf_phdata; i < elfhdr_ex.e_phnum; i++, elf_ppnt++) {
178+
if (elf_ppnt->p_type == PT_INTERP) {
179+
// TODO: Support ELF interpreters
180+
puts("INTERP must not be set");
181+
goto out_free_ph;
182+
}
183+
}
184+
185+
uintptr_t load_addr = 0;
186+
unsigned load_addr_set = 0;
187+
uintptr_t load_bias = 0;
188+
189+
// TODO: Support GNU_STACK and architecture specific program headers
190+
// TODO: Support executable stack
191+
192+
for (i = 0, elf_ppnt = elf_phdata; i < elfhdr_ex.e_phnum; i++, elf_ppnt++) {
193+
if (elf_ppnt->p_type != PT_LOAD)
194+
continue;
195+
196+
if (elfhdr_ex.e_type == ET_DYN && !load_addr_set) {
197+
// TODO: handle the case where we have an ELF interpreter.
198+
// if (interpreter) {
199+
// } else {
200+
// Get a memory region that is large enough to hold the whole binary
201+
uintptr_t total_size = elf_mapping_size(elf_phdata, elfhdr_ex.e_phnum);
202+
if (total_size == 0) {
203+
retval = -ENOEXEC;
204+
goto out_free_ph;
205+
}
206+
207+
void* load_bias_ptr = mmap(NULL, total_size, PROT_NONE,
208+
MAP_PRIVATE|MAP_NORESERVE|MAP_ANONYMOUS,
209+
-1, 0);
210+
if (BAD_ADDR(load_bias_ptr)) {
211+
retval = (int) (uintptr_t) load_bias_ptr;
212+
goto out_free_ph;
213+
}
214+
munmap(load_bias_ptr, total_size);
215+
216+
load_bias = (uintptr_t) load_bias_ptr;
217+
// }
218+
load_bias = ALIGN_DOWN(load_bias - elf_ppnt->p_vaddr, PAGESIZE);
219+
}
220+
221+
if (!load_addr_set) {
222+
load_addr_set = 1;
223+
load_addr = (elf_ppnt->p_vaddr-elf_ppnt->p_offset) + load_bias;
224+
}
225+
226+
retval = elf_map(load_bias + elf_ppnt->p_vaddr, elf_ppnt, fd);
227+
if (retval < 0)
228+
goto out_free_ph;
229+
}
230+
231+
if (out_info != NULL) {
232+
out_info->entry = (void*) (load_bias + elfhdr_ex.e_entry);
233+
out_info->phdr = (Elf_Phdr*) (load_addr + elfhdr_ex.e_phoff);
234+
out_info->phnum = elfhdr_ex.e_phnum;
235+
out_info->phent = elfhdr_ex.e_phentsize;
236+
}
237+
238+
retval = 0;
239+
240+
out_free_ph:
241+
munmap(elf_phdata, 0x1000);
242+
243+
out_close:
244+
close(fd);
245+
246+
out:
247+
return retval;
248+
}

client/elf-loader.h

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
#ifndef _INSTREW_ELF_LOADER_H
3+
#define _INSTREW_ELF_LOADER_H
4+
5+
#include <elf.h>
6+
#include <stddef.h>
7+
8+
9+
#define Elf_Ehdr Elf64_Ehdr
10+
#define Elf_Phdr Elf64_Phdr
11+
#define Elf_Shdr Elf64_Shdr
12+
#define Elf_Note Elf64_Note
13+
14+
struct BinaryInfo {
15+
void* entry;
16+
Elf_Phdr* phdr;
17+
size_t phnum;
18+
size_t phent;
19+
};
20+
21+
typedef struct BinaryInfo BinaryInfo;
22+
23+
int load_elf_binary(const char* filename, BinaryInfo* out_info);
24+
25+
#endif

0 commit comments

Comments
 (0)