Merge pull request #90 from steve-numeus/master

siboehm · web-flow · commit 425148024e5c · 2024-12-03T09:48:24.000-08:00
llvm: ability to specify target cpu and features
diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py
@@ -92,6 +92,8 @@ def compile(
         finline=True,
         froot_func_name="forest_root",
         use_fp64=True,
+        target_cpu=None,
+        target_cpu_features=None,
     ):
         """
         Generate the LLVM IR for this model and compile it to ASM.
@@ -117,6 +119,10 @@ def compile(
         :param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when
             writing a C function wrapper. Defaults to "forest_root".
         :param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float).
+        :param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's
+            cpu name).
+        :param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the
+            host's CPU features).
         """
         assert fblocksize > 0
         assert fcodemodel in ("small", "large")
@@ -137,7 +143,11 @@ def compile(
 
         # keep a reference to the engine to protect it from being garbage-collected
         self._execution_engine = compile_module_to_asm(
-            module, cache, fcodemodel=fcodemodel
+            module,
+            cache,
+            fcodemodel=fcodemodel,
+            target_cpu=target_cpu,
+            target_cpu_features=target_cpu_features,
         )
 
         # Drops GIL during call, re-acquires it after
diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py
@@ -13,30 +13,41 @@ def _initialize_llvm():
     llvm.initialize_native_asmprinter()
 
 
-def _get_target_machine(fcodemodel="large"):
+def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None):
     target = llvm.Target.from_triple(llvm.get_process_triple())
-    try:
-        # LLVM raises if features cannot be detected
-        features = llvm.get_host_cpu_features().flatten()
-    except RuntimeError:
-        features = ""
+
+    if target_cpu is None:
+        target_cpu = llvm.get_host_cpu_name()
+
+    if target_cpu_features is None:
+        try:
+            # LLVM raises if features cannot be detected
+            target_cpu_features = llvm.get_host_cpu_features().flatten()
+        except RuntimeError:
+            target_cpu_features = ""
 
     # large codemodel is necessary for large, ~1000 tree models.
     # for smaller models "default" codemodel would be faster.
     target_machine = target.create_target_machine(
-        cpu=llvm.get_host_cpu_name(),
-        features=features,
+        cpu=target_cpu,
+        features=target_cpu_features,
         reloc="pic",
         codemodel=fcodemodel,
     )
     return target_machine
 
 
-def compile_module_to_asm(module, cache_path=None, fcodemodel="large"):
+def compile_module_to_asm(
+    module,
+    cache_path=None,
+    fcodemodel="large",
+    target_cpu=None,
+    target_cpu_features=None,
+):
     _initialize_llvm()
 
     # Create a target machine representing the host
-    target_machine = _get_target_machine(fcodemodel)
+    target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features)
 
     # Create execution engine for our module
     execution_engine = llvm.create_mcjit_compiler(module, target_machine)