From 827a990160b18d1fefc2d655967d7ca51de90641 Mon Sep 17 00:00:00 2001 From: Ethan Uppal <113849268+ethanuppal@users.noreply.github.com> Date: Sat, 11 May 2024 01:56:46 -0400 Subject: [PATCH] Emission works, put tests in test/e2e --- .gitignore | 3 ++ .ocamlinit | 2 +- README.md | 2 +- lib/backend/asm.ml | 14 +++++-- lib/backend/asm_emit.ml | 45 +++++++++++++------- lib/backend/regalloc/regalloc.ml | 9 ++-- lib/core/util.ml | 27 ++++++++++++ lib/ir/basic_block.ml | 2 +- lib/runtime/x86istimb_debug_print.c | 6 --- lib/runtime/x86istmb_debug_print.c | 6 +++ lib/runtime/x86istmb_main.c | 7 ++++ lib/user/driver.ml | 64 +++++++++++++++++++++++++++-- lib/user/driver.mli | 4 ++ source/basic.x86istmb | 4 ++ test/bin/.gitkeep | 0 test/e2e/basic.x86istmb | 4 ++ test/e2e/empty.x86istmb | 3 ++ test/test_e2e.ml | 25 +++++++++++ test/test_util.ml | 13 ++++++ test/test_x86ISTMB.ml | 1 + 20 files changed, 206 insertions(+), 35 deletions(-) delete mode 100644 lib/runtime/x86istimb_debug_print.c create mode 100644 lib/runtime/x86istmb_debug_print.c create mode 100644 lib/runtime/x86istmb_main.c create mode 100644 source/basic.x86istmb create mode 100644 test/bin/.gitkeep create mode 100644 test/e2e/basic.x86istmb create mode 100644 test/e2e/empty.x86istmb create mode 100644 test/test_e2e.ml diff --git a/.gitignore b/.gitignore index 2f7f17f..c500aec 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ test/project_root.ml gitlog.txt docs/html/* !docs/html/.gitkeep +build_dir/ +bin/* +!bin/.gitkeep diff --git a/.ocamlinit b/.ocamlinit index 99da2f5..5b39909 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -13,4 +13,4 @@ let show_regalloc file = let liveliness = Liveliness.analysis_of cfg in let ordering = InstrOrdering.make cfg in print_endline (Cfg.to_string cfg); - Regalloc.allocate_for cfg liveliness ordering |> Regalloc.VarTbl.to_seq |> List.of_seq \ No newline at end of file + Regalloc.allocate_for cfg liveliness ordering |> Regalloc.Ir.VariableMap.to_seq |> List.of_seq diff --git a/README.md b/README.md index 14bc2fb..eeb6281 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ![CI Status](https://github.com/ethanuppal/cs3110_compiler/actions/workflows/ci.yaml/badge.svg) > "x86 is simple trust me bro" -> Last updated: 2024-05-10 23:44:05.745284 +> Last updated: 2024-05-11 01:56:28.147911 ``` $ ./main -h diff --git a/lib/backend/asm.ml b/lib/backend/asm.ml index 0c1a112..a25bc3c 100644 --- a/lib/backend/asm.ml +++ b/lib/backend/asm.ml @@ -40,17 +40,22 @@ module Register = struct | R15 -> "r15" let compare = Stdlib.compare + + (** Every register but RBX, RSP, RBP, and R12–R15. *) + let caller_saved = [ RAX; RCX; RDX; RSI; RDI; R8; R9; R10; R11 ] end module Operand = struct type t = | Register of Register.t + | Deref of Register.t * int | Intermediate of int | Label of string | RelativeLabel of string let to_nasm = function | Register reg -> Register.to_nasm reg + | Deref (reg, off) -> Printf.sprintf "[%s + %d]" (Register.to_nasm reg) off | Intermediate int -> string_of_int int | Label label -> label | RelativeLabel rel_label -> "[rel " ^ rel_label ^ "]" @@ -76,10 +81,9 @@ end = struct let to_nasm label = match (label.is_global, label.is_external) with - | false, false -> label.name - | true, false -> "global " ^ label.name ^ "\n" ^ display_indent ^ label.name - | false, true -> - "external " ^ label.name ^ "\n" ^ display_indent ^ label.name + | false, false -> label.name ^ ":" + | true, false -> "global " ^ label.name ^ "\n" ^ label.name ^ ":" + | false, true -> "extern " ^ label.name | _ -> failwith "invalid label" end @@ -159,11 +163,13 @@ end module AssemblyFile : sig type t + val make : unit -> t val add : t -> Section.t -> unit val to_nasm : t -> string end = struct type t = Section.t BatDynArray.t + let make () = BatDynArray.make 16 let add = BatDynArray.add let to_nasm = diff --git a/lib/backend/asm_emit.ml b/lib/backend/asm_emit.ml index 1398c70..3ef738d 100644 --- a/lib/backend/asm_emit.ml +++ b/lib/backend/asm_emit.ml @@ -1,21 +1,29 @@ +let debug_print_symbol = "_x86istmb_debug_print" + let emit_var regalloc var = - Asm.Operand.Register (Ir.VariableMap.find regalloc var) + match Ir.VariableMap.find regalloc var with + | Regalloc.Register reg -> Asm.Operand.Register reg + | Spill i -> Asm.Operand.Deref (RSP, i) let emit_oper regalloc = function | Operand.Variable var -> emit_var regalloc var | Constant int -> Asm.Operand.Intermediate int let emit_call text regalloc name args = + let to_save = Asm.Register.caller_saved in + let to_save = + if List.length to_save mod 2 = 0 then to_save + else List.hd to_save :: to_save + in Asm.Section.add_all text - [ - Push (Register RDI); - Push (Register RDI); - (* double push for 16 byte alignment *) - Mov (Register RDI, List.hd args |> emit_oper regalloc); - Call (Label name); - Pop (Register RDI); - Pop (Register RDI); - ] + (List.map (fun r -> Asm.Instruction.Push (Register r)) to_save + @ [ + (* double push for 16 byte alignment *) + Asm.Instruction.Mov (Register RDI, List.hd args |> emit_oper regalloc); + Call (Label name); + ] + @ (List.map (fun r -> Asm.Instruction.Pop (Register r)) to_save |> List.rev) + ) (** *) let emit_ir text regalloc = function @@ -35,7 +43,7 @@ let emit_ir text regalloc = function ] | Ref _ -> failwith "ref not impl" | Deref _ -> failwith "deref not impl" - | DebugPrint op -> emit_call text regalloc "_x86istimb_debug_print" [ op ] + | DebugPrint op -> emit_call text regalloc debug_print_symbol [ op ] | Call _ -> failwith "TODO" | Return op -> Asm.Section.add text (Mov (Register RAX, emit_oper regalloc op)) @@ -64,8 +72,17 @@ let emit_bb text cfg regalloc bb = let emit_preamble ~text = Asm.Section.add text (Label - (Asm.Label.make ~is_global:false ~is_external:true - "_x86istimb_debug_print")) + (Asm.Label.make ~is_global:false ~is_external:true debug_print_symbol)) let emit_cfg ~text cfg regalloc = - Cfg.blocks_of cfg |> List.iter (emit_bb text cfg regalloc) + Asm.Section.add_all text + [ + Label + (Asm.Label.make ~is_global:true ~is_external:false + ("_x86istmb_" ^ Cfg.name_of cfg)); + Push (Register RBP); + Mov (Register RBP, Register RSP); + ]; + Cfg.blocks_of cfg |> List.iter (emit_bb text cfg regalloc); + Asm.Section.add_all text + [ Mov (Register RSP, Register RBP); Pop (Register RBP); Ret ] diff --git a/lib/backend/regalloc/regalloc.ml b/lib/backend/regalloc/regalloc.ml index e85d799..4be8c49 100644 --- a/lib/backend/regalloc/regalloc.ml +++ b/lib/backend/regalloc/regalloc.ml @@ -108,14 +108,15 @@ let linear_scan (intervals : (Variable.t * interval) list) if compare_instr_id spill_interval.stop interval.stop > 0 then ( (* spill guaranteed to be assigned an actual register *) - let alloc = VarTbl.find assigned_alloc spill_var in + let alloc = Ir.VariableMap.find assigned_alloc spill_var in assert ( match alloc with | Spill _ -> false | _ -> true); - VarTbl.replace assigned_alloc var alloc; - VarTbl.replace assigned_alloc spill_var (Spill (next_spill_loc ())); + Ir.VariableMap.replace assigned_alloc var alloc; + Ir.VariableMap.replace assigned_alloc spill_var + (Spill (next_spill_loc ())); (* this sucks. can we maybe keep active in reverse order? *) BatRefList.Index.remove_at active (BatRefList.length active - 1); @@ -123,7 +124,7 @@ let linear_scan (intervals : (Variable.t * interval) list) (* add_sort is buggy... TODO: new impl *) BatRefList.push active (var, interval); BatRefList.sort ~cmp:compare_pair_end active) - else VarTbl.replace assigned_alloc var (Spill (next_spill_loc ())) + else Ir.VariableMap.replace assigned_alloc var (Spill (next_spill_loc ())) in List.iter diff --git a/lib/core/util.ml b/lib/core/util.ml index e515e68..a304a40 100644 --- a/lib/core/util.ml +++ b/lib/core/util.ml @@ -9,6 +9,33 @@ let ( >> ) f g x = g (f x) exist, the behavior is undefined. *) let read_file filename = BatFile.with_file_in filename BatIO.read_all +(** [write_file filename content] writes [content] to the file at [filename]. If + the file already exists, it is overwritten. *) +let write_file filename content = + BatFile.with_file_out filename (fun oc -> BatIO.write_line oc content) + +(** [get_command_output command] is the standard output of running [command] in + the shell. *) +let get_command_output command = + let ic = Unix.open_process_in command in + let rec read_lines acc = + try + let line = input_line ic in + read_lines (acc ^ line ^ "\n") + with End_of_file -> + close_in ic; + acc + in + read_lines "" + +(** [contains_substring str sub] if and only if [str] contains [sub]. *) +let contains_substring str sub = + let re = Str.regexp_string sub in + try + ignore (Str.search_forward re str 0); + true + with Not_found -> false + (** [(uncurry f) (x, y) = f x y]. *) let uncurry f (x, y) = f x y diff --git a/lib/ir/basic_block.ml b/lib/ir/basic_block.ml index f51b61e..bc43cad 100644 --- a/lib/ir/basic_block.ml +++ b/lib/ir/basic_block.ml @@ -29,7 +29,7 @@ let get_orig_idx bb idx = BatDynArray.get bb.contents idx |> snd let set_ir bb idx ir = BatDynArray.set bb.contents idx (ir, get_orig_idx bb idx) let rem_ir bb idx = BatDynArray.remove_at idx bb.contents let to_list bb = BatDynArray.to_list bb.contents |> List.map fst -let label_for bb = Printf.sprintf ".L_BB%d:" (id_of bb |> Id.int_of) +let label_for bb = Printf.sprintf ".L_BB%d" (id_of bb |> Id.int_of) let equal bb1 bb2 = Id.equal bb1.id bb2.id let hash bb = Id.int_of bb.id |> Int.hash diff --git a/lib/runtime/x86istimb_debug_print.c b/lib/runtime/x86istimb_debug_print.c deleted file mode 100644 index 8c4c23a..0000000 --- a/lib/runtime/x86istimb_debug_print.c +++ /dev/null @@ -1,6 +0,0 @@ -#include -#include - -void x86istimb_debug_print(int64_t value) { - printf("%d\n", value); -} diff --git a/lib/runtime/x86istmb_debug_print.c b/lib/runtime/x86istmb_debug_print.c new file mode 100644 index 0000000..d70d0ae --- /dev/null +++ b/lib/runtime/x86istmb_debug_print.c @@ -0,0 +1,6 @@ +#include +#include + +void x86istmb_debug_print(int64_t value) { + printf("%lld\n", value); +} diff --git a/lib/runtime/x86istmb_main.c b/lib/runtime/x86istmb_main.c new file mode 100644 index 0000000..6a551c5 --- /dev/null +++ b/lib/runtime/x86istmb_main.c @@ -0,0 +1,7 @@ +#include + +int64_t x86istmb_main(void); + +int main() { + return x86istmb_main(); +} diff --git a/lib/user/driver.ml b/lib/user/driver.ml index d71e74b..ae33757 100644 --- a/lib/user/driver.ml +++ b/lib/user/driver.ml @@ -1,3 +1,4 @@ +let runtime_lib_loc = Util.merge_paths [ Project_root.path; "lib/runtime" ] let print_error = Printf.eprintf "error: %s" let print_help prog = @@ -20,8 +21,8 @@ let print_version () = printf "\n"; printf "Written by: %s\n" (String.concat ", " Meta.get.authors) -let compile paths _ = - Printf.printf "assumes [paths] has one file, ignores flags\n"; +let compile paths _ build_dir_loc = + Printf.printf "[DEBUG] assumes [paths] has one file, ignores flags\n"; let source_path = List.hd paths in let source = Util.read_file source_path in try @@ -34,7 +35,62 @@ let compile paths _ = let regalloc = Regalloc.allocate_for main_cfg liveliness_analysis instr_ordering in - Asm_emit.emit section + let text_section = Asm.Section.make "text" 16 in + Asm_emit.emit_preamble ~text:text_section; + Asm_emit.emit_cfg ~text:text_section main_cfg regalloc; + let asm_file = Asm.AssemblyFile.make () in + Asm.AssemblyFile.add asm_file text_section; + let asm_output_path = + BatFilename.(source_path |> basename |> chop_extension) ^ ".nasm" + in + let build_dir = + Util.merge_paths + [ + (match build_dir_loc with + | Some loc -> loc + | None -> "."); + "build_dir"; + ] + in + if Sys.command (Printf.sprintf "mkdir -p %s" build_dir) <> 0 then + failwith "could not create folder build_dir/ in current directory"; + if Sys.command (Printf.sprintf "rm -f %s/*" build_dir) <> 0 then + failwith "could not remove old build_dir/ contents"; + Util.write_file + (Util.merge_paths [ build_dir; asm_output_path ]) + (Asm.AssemblyFile.to_nasm asm_file); + Util.write_file + (Util.merge_paths [ build_dir; "Makefile" ]) + (Printf.sprintf + "build:\n\ + \t@nasm -f macho64 %s -o build.o\n\ + \t@clang build.o %s/* -o a.out\n" + asm_output_path runtime_lib_loc); + let uname = Util.get_command_output "uname" in + let processor = Util.get_command_output "uname -p" in + let cmd_prefix = + if + Util.contains_substring uname "Darwin" + && Util.contains_substring processor "arm" + then "arch -x86_64 " + else "" + in + Util.write_file + (Util.merge_paths [ build_dir; "cmd_prefix.txt" ]) + cmd_prefix; + (* Util.write_file "build_dir/exec_helper" ("#!/bin/sh\n" ^ cmd_prefix ^ + "./a.out\n"); *) + if + Sys.command + (Printf.sprintf "cd %s/ && " build_dir + ^ cmd_prefix ^ "make build 2>/dev/null") + <> 0 + then failwith "compilation failed"; + Printf.printf "==> Wrote build files to %s\n" build_dir; + Printf.printf + "==> You can run the executable with %s, prefixing with Rosetta as \ + appropriate\n" + (Util.merge_paths [ build_dir; "a.out" ]) (* let simulator = Ir_sim.make () in Ir_sim.run simulator main_cfg; print_string (Ir_sim.output_of simulator) *) with Parse_lex.ParserError msg -> print_error (msg ^ "\n") @@ -44,6 +100,6 @@ let main args = match parse.action with | Help -> print_help parse.prog | Version -> print_version () - | Compile { paths; flags } -> compile paths flags + | Compile { paths; flags } -> compile paths flags None | Error { msg } -> Printf.sprintf "%s\nuse %s -h\n" msg parse.prog |> print_error diff --git a/lib/user/driver.mli b/lib/user/driver.mli index 591e3f5..e1180aa 100644 --- a/lib/user/driver.mli +++ b/lib/user/driver.mli @@ -1 +1,5 @@ +(** [main argv] *) val main : string array -> unit + +(** [compile paths flags build_dir_loc] *) +val compile : string list -> Cli.flag list -> string option -> unit diff --git a/source/basic.x86istmb b/source/basic.x86istmb new file mode 100644 index 0000000..76a4fb8 --- /dev/null +++ b/source/basic.x86istmb @@ -0,0 +1,4 @@ +func main() { + print 1 + print 2 +} diff --git a/test/bin/.gitkeep b/test/bin/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test/e2e/basic.x86istmb b/test/e2e/basic.x86istmb new file mode 100644 index 0000000..76a4fb8 --- /dev/null +++ b/test/e2e/basic.x86istmb @@ -0,0 +1,4 @@ +func main() { + print 1 + print 2 +} diff --git a/test/e2e/empty.x86istmb b/test/e2e/empty.x86istmb new file mode 100644 index 0000000..64f7376 --- /dev/null +++ b/test/e2e/empty.x86istmb @@ -0,0 +1,3 @@ +func main() { + +} diff --git a/test/test_e2e.ml b/test/test_e2e.ml new file mode 100644 index 0000000..5b2a198 --- /dev/null +++ b/test/test_e2e.ml @@ -0,0 +1,25 @@ +open X86ISTMB +open Alcotest + +let e2e_root = Util.merge_paths [ Project_root.path; "test/e2e" ] +let test_bin = Util.merge_paths [ Project_root.path; "test/bin" ] + +let make_e2e_test filename source () = + let expected = Test_snapshots.ir_transform filename source in + Driver.compile [ filename ] [] (Some test_bin); + let actual = + Util.get_command_output + (Util.read_file + (Util.merge_paths [ test_bin; "build_dir/cmd_prefix.txt" ]) + ^ " " + ^ Util.merge_paths [ test_bin; "build_dir/a.out" ]) + in + (check string) "Compiled output should match IR simulator" expected actual + +let test_suite = + ( "lib/backend/asm_emit.ml", + Sys.readdir e2e_root |> Array.to_list + |> List.map (fun filename -> + let path = Util.merge_paths [ e2e_root; filename ] in + test_case filename `Slow (make_e2e_test path (Util.read_file path))) + ) diff --git a/test/test_util.ml b/test/test_util.ml index 364a276..f67c702 100644 --- a/test/test_util.ml +++ b/test/test_util.ml @@ -29,9 +29,22 @@ let test_basename () = (check string) "Last component is extracted" "foo.baz" (Util.basename "bop/bong/birp/bar/foo.baz") +let test_get_command_output () = + (check string) "Standard output should be captured" "Hi\n" + (Util.get_command_output "echo Hi") + +let test_contains_substring () = + (check bool) "Every string contains the empty string" true + (Util.contains_substring "" ""); + (check bool) "Every string contains the empty string" true + (Util.contains_substring "afladslfd" ""); + (check bool) "cat contains a" true (Util.contains_substring "cat" "a") + let test_suite = ( "lib/util.ml", [ test_case "Util.merge_paths" `Quick test_merge_paths; test_case "Util.test_basename" `Quick test_basename; + test_case "Util.get_command_output" `Quick test_get_command_output; + test_case "Util.contains_substring" `Quick test_contains_substring; ] ) diff --git a/test/test_x86ISTMB.ml b/test/test_x86ISTMB.ml index 9324488..818e273 100644 --- a/test/test_x86ISTMB.ml +++ b/test/test_x86ISTMB.ml @@ -56,6 +56,7 @@ let () = Test_snapshots.basic_suite; Test_snapshots.parse_suite; Test_liveliness.test_suite; + Test_e2e.test_suite; Test_passes.test_suite; Test_digraph.test_suite; Test_context.test_suite;