Skip to content

Commit 4251480

Browse files
authored
Merge pull request #90 from steve-numeus/master
llvm: ability to specify target cpu and features
2 parents ced9b70 + d67ca88 commit 4251480

File tree

2 files changed

+32
-11
lines changed

2 files changed

+32
-11
lines changed

lleaves/lleaves.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ def compile(
9292
finline=True,
9393
froot_func_name="forest_root",
9494
use_fp64=True,
95+
target_cpu=None,
96+
target_cpu_features=None,
9597
):
9698
"""
9799
Generate the LLVM IR for this model and compile it to ASM.
@@ -117,6 +119,10 @@ def compile(
117119
:param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when
118120
writing a C function wrapper. Defaults to "forest_root".
119121
:param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float).
122+
:param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's
123+
cpu name).
124+
:param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the
125+
host's CPU features).
120126
"""
121127
assert fblocksize > 0
122128
assert fcodemodel in ("small", "large")
@@ -137,7 +143,11 @@ def compile(
137143

138144
# keep a reference to the engine to protect it from being garbage-collected
139145
self._execution_engine = compile_module_to_asm(
140-
module, cache, fcodemodel=fcodemodel
146+
module,
147+
cache,
148+
fcodemodel=fcodemodel,
149+
target_cpu=target_cpu,
150+
target_cpu_features=target_cpu_features,
141151
)
142152

143153
# Drops GIL during call, re-acquires it after

lleaves/llvm_binding.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -13,30 +13,41 @@ def _initialize_llvm():
1313
llvm.initialize_native_asmprinter()
1414

1515

16-
def _get_target_machine(fcodemodel="large"):
16+
def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None):
1717
target = llvm.Target.from_triple(llvm.get_process_triple())
18-
try:
19-
# LLVM raises if features cannot be detected
20-
features = llvm.get_host_cpu_features().flatten()
21-
except RuntimeError:
22-
features = ""
18+
19+
if target_cpu is None:
20+
target_cpu = llvm.get_host_cpu_name()
21+
22+
if target_cpu_features is None:
23+
try:
24+
# LLVM raises if features cannot be detected
25+
target_cpu_features = llvm.get_host_cpu_features().flatten()
26+
except RuntimeError:
27+
target_cpu_features = ""
2328

2429
# large codemodel is necessary for large, ~1000 tree models.
2530
# for smaller models "default" codemodel would be faster.
2631
target_machine = target.create_target_machine(
27-
cpu=llvm.get_host_cpu_name(),
28-
features=features,
32+
cpu=target_cpu,
33+
features=target_cpu_features,
2934
reloc="pic",
3035
codemodel=fcodemodel,
3136
)
3237
return target_machine
3338

3439

35-
def compile_module_to_asm(module, cache_path=None, fcodemodel="large"):
40+
def compile_module_to_asm(
41+
module,
42+
cache_path=None,
43+
fcodemodel="large",
44+
target_cpu=None,
45+
target_cpu_features=None,
46+
):
3647
_initialize_llvm()
3748

3849
# Create a target machine representing the host
39-
target_machine = _get_target_machine(fcodemodel)
50+
target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features)
4051

4152
# Create execution engine for our module
4253
execution_engine = llvm.create_mcjit_compiler(module, target_machine)

0 commit comments

Comments
 (0)