Use Google Benchmark for low-level benchmarks

siboehm · siboehm · commit 3b965a7ca4dc · 2021-11-19T07:04:27.000+01:00
diff --git a/benchmarks/c_bench/CMakeLists.txt b/benchmarks/c_bench/CMakeLists.txt
@@ -4,15 +4,19 @@ project(c_bench)
 set(CMAKE_CXX_STANDARD 11)
 set(MODEL $ENV{LLEAVES_BENCHMARK_MODEL}) # NYC_taxi / airline / mtpl2
 
-add_executable(benchmark c_bench.cpp)
+add_executable(c_bench c_bench.cpp)
 
+# remove the cached model file
 file(REMOVE ${MODEL}.o)
+# generate new model file
+find_package(Python COMPONENTS Interpreter)
 add_custom_target(
         run ALL
-        COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/gen_binary.py ${MODEL}
+        COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/gen_binary.py ${MODEL}
         BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL}.o
         COMMENT "Compiling model"
 )
-add_dependencies(benchmark run)
+add_dependencies(c_bench run)
+find_package(benchmark REQUIRED)
 
-target_link_libraries(benchmark cnpy ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL}.o)
+target_link_libraries(c_bench cnpy ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL}.o benchmark::benchmark)
diff --git a/benchmarks/c_bench/README.md b/benchmarks/c_bench/README.md
@@ -1,14 +1,16 @@
-# Setting up C++ benchmark suite
+# Setting up Google Benchmark
 
 To specify the model to be benchmarked 
 set the environment variable ``LLEAVES_BENCHMARK_MODEL`` to one of
 `airline`, `NYC_taxi`, `mtpl2`.
 
+To download the data used for benchmarking there is a bash script `benchmarks/data/setup_data.sh`.
+
 ```bash
 mkdir build && cd build
 export LLEAVES_BENCHMARK_MODEL="mtpl2"
 cmake .. && make
-./benchmark
+./c_bench
 ```
 
 There is a script to use [toplev](https://github.com/andikleen/pmu-tools) to generate
diff --git a/benchmarks/c_bench/c_bench.cpp b/benchmarks/c_bench/c_bench.cpp
@@ -1,42 +1,27 @@
 #include "c_bench.h"
-#include "cnpy.h"
+#include <cnpy.h>
+#include <benchmark/benchmark.h>
 #include <algorithm>
 #include <cstdlib>
 #include <iostream>
 
-#define N_REPEAT 20
-
-int main(int argc, char **argv) {
-  (void)argc;
-  (void)argv;
-
+static void bm_lleaves(benchmark::State& state) {
   char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL");
-  std::cout << "Running model " << model_name << "\n";
 
   std::ostringstream model_stream;
   model_stream << "../../data/" << model_name << ".npy";
   std::string model_file = model_stream.str();
   cnpy::NpyArray arr = cnpy::npy_load(model_file);
 
-  std::cout << "Batchsize: " << arr.shape[0] << "\n";
-
   auto *loaded_data = arr.data<double>();
-  ulong n_preds = arr.shape[0] / (ulong)6;
+  ulong n_preds = arr.shape[0];
   auto *out = (double *)(malloc(n_preds * sizeof(double)));
 
-  std::array<double, N_REPEAT> timings{};
-  clock_t start, end;
-  std::cout << "starting...\n";
-  for (size_t i = 0; i < N_REPEAT; ++i) {
-    start = clock();
-    forest_root(loaded_data, out, (int)0, (int)n_preds);
-    end = clock();
-
-    timings[i] = (double)(end - start) / CLOCKS_PER_SEC;
+  for (auto _ : state){
+      // predict over the whole input array
+      forest_root(loaded_data, out, (int)0, (int)n_preds);
   }
-  std::cout << "...ending, took "
-            << std::accumulate(timings.begin(), timings.end(), 0.0) << "\n";
-
-  std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end())
-            << "\n";
 }
+
+BENCHMARK(bm_lleaves);
+BENCHMARK_MAIN();
diff --git a/benchmarks/c_bench/gen_binary.py b/benchmarks/c_bench/gen_binary.py
@@ -1,9 +1,25 @@
 import os
+import time
 
 import lleaves
 
 model = os.environ["LLEAVES_BENCHMARK_MODEL"]
+
+fcodemodel = os.environ.get("LLEAVES_FCODEMODEL", "large")
+finline = os.environ.get("LLEAVES_FINLINE", "True")
+assert finline in (None, "True", "False")
+fblocksize = os.environ.get("LLEAVES_FBLOCKSIZE", 34)
+
 print(f"Generating {model}.o")
 
-llvm_model = lleaves.Model(model_file=f"../../../tests/models/{model}/model.txt")
-llvm_model.compile(cache=f"../{model}.o")
+llvm_model = lleaves.Model(
+    model_file=f"../../../tests/models/{model}/model.txt",
+)
+start = time.time()
+llvm_model.compile(
+    cache=f"../{model}.o",
+    fblocksize=int(fblocksize) if fblocksize else None,
+    fcodemodel=fcodemodel,
+    finline=finline == "True",
+)
+print(f"Compiling took: {time.time() - start}")
diff --git a/benchmarks/data/gen_npy.py b/benchmarks/data/gen_npy.py
@@ -5,16 +5,13 @@
 from benchmarks.train_NYC_model import feature_enginering
 
 df = pd.read_csv("airline_data_factorized.csv")
-airline_X = df.to_numpy(np.float32)
-with open("airline.npy", "wb") as f:
-    np.save(f, airline_X)
+airline_X = df.to_numpy(np.float64)
+np.save("airline.npy", airline_X)
 
 df = pd.read_parquet("yellow_tripdata_2016-01.parquet", columns=NYC_used_columns)
-NYC_X = feature_enginering().fit_transform(df).astype(np.float32)
-with open("NYC_taxi.npy", "wb") as f:
-    np.save(f, NYC_X)
+NYC_X = feature_enginering().fit_transform(df).astype(np.float64)
+np.save("NYC_taxi.npy", NYC_X)
 
 df = pd.read_parquet("mtpl2.parquet")
-mtpl2_X = df.to_numpy(np.float32)
-with open("mtpl2.npy", "wb") as f:
-    np.save(f, mtpl2_X)
+mtpl2_X = df.to_numpy(np.float64)
+np.save("mtpl2.npy", mtpl2_X)
diff --git a/environment.yml b/environment.yml
@@ -21,6 +21,7 @@ dependencies:
   - sphinxcontrib-apidoc
   - setuptools-scm
   # benchmarks
+  - benchmark
   - treelite
   - compilers
   - onnxruntime