Skip to content

Commit

Permalink
merge script to summarize the System Calls and File Descriptors for e…
Browse files Browse the repository at this point in the history
…ach benchmark (#51)

* sys_summary

* integrate sys summary into table generation pipline

---------

Co-authored-by: Zhuoxuan Zhang <hzhang6@oberlin.edu>
  • Loading branch information
Zhuoxuan-Zhang and Zhuoxuan Zhang authored Jan 13, 2025
1 parent 312e993 commit 4ba35f3
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 1 deletion.
10 changes: 9 additions & 1 deletion infrastructure/colossal_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
data_path = root / 'infrastructure/target/dynamic_analysis.jsonl'
input_size_path = root / 'infrastructure/data/size_inputs.jsonl'
loc_data_path = root / 'infrastructure/target/lines_of_code.csv'
csv_file_path = root / 'infrastructure/data/sys_summary.csv'

def read_sys_results():
csv_data = pd.read_csv(csv_file_path)
csv_data.rename(columns={'Benchmark': 'benchmark', 'Sys Calls': 'sys_calls', 'File Descriptors': 'file_descriptors'}, inplace=True)
return csv_data

benchmark_category_style = {
'bio': ('XXX', 'XXX', 'XXX'),
Expand Down Expand Up @@ -145,6 +151,7 @@ def main():
syntax_script_all_cmds, syntax_bench_all_cmds = stx.read_data(False)
dyn_script, dyn_bench = dyn.read_data()
loc_data_script, loc_data_bench = read_loc_data()
sys_results = read_sys_results()

syntax_script_all_cmds['unique_cmds'] = syntax_script_all_cmds['nodes'].apply(count_unique_cmds)
syntax_bench_all_cmds['unique_cmds'] = syntax_bench_all_cmds['nodes'].apply(count_unique_cmds)
Expand All @@ -165,7 +172,8 @@ def main():

big_bench = syntax_bench.merge(dyn_bench, on='benchmark')\
.merge(loc_data_bench, on='benchmark')\
.merge(syntax_bench_all_cmds[['benchmark', 'unique_cmds']], on='benchmark')
.merge(syntax_bench_all_cmds[['benchmark', 'unique_cmds']], on='benchmark')\
.merge(sys_results, on='benchmark')

big_script = syntax_script.merge(dyn_script, on='script')\
.merge(loc_data_script, on='script')\
Expand Down
14 changes: 14 additions & 0 deletions infrastructure/data/sys_summary.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Benchmark,Sys Calls,File Descriptors
aurpkg,207,0
covid-mts,252,0
file-enc,246,13
log-analysis,225,13
makeself,1237,13
max-temp,202,0
media-conv,225,13
nlp,2054,15
oneliners,677,13
riker,254,13
unix50,1556,0
vps-audit,207,13
vps-audit-negate,207,13
75 changes: 75 additions & 0 deletions infrastructure/sys_summary.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash

output_file="benchmark_results.csv"

benchmarks=(
"aurpkg"
"bio"
"covid-mts"
"file-enc"
"log-analysis"
"makeself"
"max-temp"
"media-conv"
"nlp"
"oneliners"
"riker"
"sklearn"
"unix50"
"vps-audit"
"vps-audit-negate"
"web-index"
)

# Error handler
error() {
echo "Error: $1" >&2
exit 1
}

echo "Benchmark,Sys Calls,File Descriptors" > "$output_file"

for benchmark in "${benchmarks[@]}"; do
echo "Running benchmark: $benchmark"

strace_output="/tmp/${benchmark}_strace.txt"
lsof_output="/tmp/${benchmark}_lsof.txt"

if ! cd "./$benchmark"; then
echo "$benchmark [fail]: Directory not found" >> "$output_file"
continue
fi

strace -c -o "$strace_output" ./run.sh --small || {
echo "$benchmark [fail]: strace failed" >> "$output_file"
cd - > /dev/null
continue
}

./run.sh --small &
pid=$!

sleep 1

if [[ -d /proc/$pid ]]; then
lsof -p "$pid" > "$lsof_output"
else
echo "Warning: Process $pid ended before lsof could capture file descriptors."
> "$lsof_output"
fi

wait "$pid"

total_syscalls=$(awk '/^100.00/ {print $4}' "$strace_output")
total_syscalls=${total_syscalls:-0}

fd_count=$(wc -l < "$lsof_output")
fd_count=${fd_count:-0}

echo "$benchmark,$total_syscalls,$fd_count" >> "../$output_file"

rm -f "$strace_output" "$lsof_output"
cd - > /dev/null
done

echo "Benchmark results saved to $output_file."

0 comments on commit 4ba35f3

Please sign in to comment.