Skip to content

Commit 46fad6e

Browse files
committed
Sync with 'develop'
Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>
2 parents 01839ea + 92af8e1 commit 46fad6e

File tree

567 files changed

+215232
-173777
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

567 files changed

+215232
-173777
lines changed

benchmarks/README.md

+38-39
Large diffs are not rendered by default.

benchmarks/common/base_model_init.py

+56-7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import glob
2222
import json
2323
import os
24+
import re
2425
import sys
2526
import time
2627

@@ -153,6 +154,16 @@ def run_numactl_multi_instance(self, cmd, replace_unique_output_dir=None):
153154
so that each instance uses a unique output folder.
154155
"""
155156

157+
# Find LD_PRELOAD vars, remove them from the cmd, and save them to add on to the prefix
158+
ld_preload_strs = re.findall(r'\bLD_PRELOAD=\S*', cmd)
159+
ld_preload_prefix = ""
160+
for ld_preload_str in ld_preload_strs:
161+
cmd = cmd.replace(ld_preload_str, "")
162+
ld_preload_prefix += ld_preload_str + " "
163+
164+
# Remove leading/trailing whitespace
165+
cmd = cmd.strip()
166+
156167
if self.args.numa_cores_per_instance != "socket":
157168
# Get the cores list and group them according to the number of cores per instance
158169
cores_per_instance = int(self.args.numa_cores_per_instance)
@@ -208,9 +219,9 @@ def run_numactl_multi_instance(self, cmd, replace_unique_output_dir=None):
208219
if len(core_list) == 0:
209220
continue
210221

211-
prefix = ("OMP_NUM_THREADS={0} "
212-
"numactl --localalloc --physcpubind={1}").format(
213-
len(core_list), ",".join(core_list))
222+
prefix = ("{0}OMP_NUM_THREADS={1} "
223+
"numactl --localalloc --physcpubind={2}").format(
224+
ld_preload_prefix, len(core_list), ",".join(core_list))
214225
instance_logfile = log_filename_format.format("instance" + str(instance_num))
215226

216227
unique_command = cmd
@@ -282,6 +293,7 @@ def get_command_prefix(self, socket_id, numactl=True):
282293
Should be used only for single instance.
283294
"""
284295
command = ""
296+
ld_preload = ""
285297

286298
if not self.args.disable_tcmalloc:
287299
# Try to find the TCMalloc library file
@@ -290,16 +302,53 @@ def get_command_prefix(self, socket_id, numactl=True):
290302
if len(matches) == 0:
291303
matches = glob.glob("/usr/lib64/libtcmalloc.so*")
292304

305+
if len(matches) == 0:
306+
matches = glob.glob("/usr/lib/*/libtcmalloc.so*")
307+
308+
if len(matches) == 0:
309+
matches = glob.glob("/usr/lib64/*/libtcmalloc.so*")
310+
293311
if len(matches) > 0:
294-
command += "LD_PRELOAD={} ".format(matches[0])
312+
ld_preload += "LD_PRELOAD={} ".format(matches[0])
295313
else:
296314
# Unable to find the TCMalloc library file
297-
print("Warning: Unable to find the TCMalloc library file (libtcmalloc.so) in /usr/lib or /usr/lib64, "
298-
"so the LD_PRELOAD environment variable will not be set.")
315+
print("Warning: Unable to find the TCMalloc library file (libtcmalloc.so) in /usr/lib, /usr/lib64, "
316+
"/usr/lib/*, or /usr/lib64/* so the LD_PRELOAD environment variable will not be set.")
299317

300318
num_numas = self.platform_util.num_numa_nodes
301319
if num_numas and socket_id != -1 and numactl and not self.args.numa_cores_per_instance:
302-
command += "numactl --cpunodebind={0} --membind={0} ".format(str(socket_id))
320+
if self.args.num_cores == -1:
321+
# Running on the whole socket
322+
command += "numactl --cpunodebind={0} --membind={0} ".format(
323+
str(socket_id))
324+
else:
325+
# Running on specific number of cores
326+
first_physical_core = self.platform_util.cpuset_cpus[0][0]
327+
num_sockets = len(self.platform_util.cpuset_cpus.keys())
328+
num_cores_in_socket0 = len(self.platform_util.cpuset_cpus[0])
329+
for i in range(num_sockets):
330+
if num_cores_in_socket0 != len(
331+
self.platform_util.cpuset_cpus[i]):
332+
raise ValueError(
333+
"Error: Identifying logical core id assumes all sockets have same number of cores"
334+
)
335+
first_logical_core = num_cores_in_socket0 * num_sockets
336+
if self.platform_util.num_threads_per_core == 1:
337+
# HT is off
338+
cpus_range = "{0}-{1}".format(
339+
first_physical_core,
340+
first_physical_core + self.args.num_cores - 1)
341+
else:
342+
# HT is on.
343+
cpus_range = "{0}-{1},{2}-{3}".format(
344+
first_physical_core,
345+
first_physical_core + self.args.num_cores - 1,
346+
first_logical_core,
347+
first_logical_core + self.args.num_cores - 1)
348+
command += "numactl -C{0} --membind=0 ".format(cpus_range)
349+
350+
# Add LD_PRELOAD to the front of the command
351+
command = ld_preload + command
303352

304353
return command
305354

benchmarks/common/platform_util.py

+37-30
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def _get_cpuset(self):
249249
with open(cpuset_cpus_file, "r") as f:
250250
cpuset = f.read()
251251

252-
if self.args.verbose:
252+
if hasattr(self.args, "verbose") and self.args.verbose:
253253
print("cpuset.cpus: {}".format(cpuset))
254254
return cpuset
255255

@@ -301,10 +301,16 @@ def linux_init(self):
301301

302302
# Try to get the cpuset.cpus info, since lscpu does not know if the cpuset is limited
303303
cpuset = self._get_cpuset()
304+
304305
if cpuset:
306+
num_cores_arg = -1
307+
if hasattr(self.args, "num_cores"):
308+
num_cores_arg = self.args.num_cores
305309
# If the cpuset is the same as the online_cpus_list, then we are using the whole
306-
# machine, so let's avoid unnecessary complexity and don't bother with the cpuset_cpu list
307-
if (online_cpus_list != "" and online_cpus_list != cpuset) or online_cpus_list == "":
310+
# machine, so let's avoid unnecessary complexity and don't bother with the cpuset_cpu list.
311+
# The cpuset_cpus list will also get populated if the num_cores arg is being specified,
312+
# since this list will be used to create the numactl args in base_model_init.py
313+
if (online_cpus_list != "" and online_cpus_list != cpuset) or online_cpus_list == "" or num_cores_arg != -1:
308314
self.cpuset_cpus = self._get_list_from_string_ranges(cpuset)
309315

310316
# Uses numactl get the core number for each numa node and adds the cores for each
@@ -316,33 +322,34 @@ def linux_init(self):
316322
cores_per_node = int(num_physical_cores / self.num_numa_nodes)
317323
else:
318324
cores_per_node = self.num_cores_per_socket
319-
320-
if self.num_numa_nodes > 0 and self.args.numa_cores_per_instance is not None:
321-
try:
322-
# Get the list of cores
323-
cpu_array_command = \
324-
"numactl -H | grep 'node [0-9]* cpus:' |" \
325-
"sed 's/.*node [0-9]* cpus: *//' | head -{0} |cut -f1-{1} -d' '".format(
326-
self.num_numa_nodes, int(cores_per_node))
327-
cpu_array = subprocess.Popen(
328-
cpu_array_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.readlines()
329-
330-
for node_cpus in cpu_array:
331-
node_cpus = str(node_cpus).lstrip("b'").replace("\\n'", " ")
332-
self.cpu_core_list.append([x for x in node_cpus.split(" ") if x != ''])
333-
334-
# If we have the cpuset list, cross check that list with our core list and
335-
# remove cores that are not part of the cpuset list
336-
if self.cpuset_cpus is not None:
337-
for socket, core_list in enumerate(self.cpu_core_list):
338-
self.cpu_core_list[socket] = [x for x in core_list if int(x) in self.cpuset_cpus]
339-
340-
if (self.args.verbose):
341-
print("Core list: {}".format(self.cpu_core_list), flush=True)
342-
343-
except Exception as e:
344-
print("Warning: An error occured when getting the list of cores using '{}':\n {}".
345-
format(cpu_array_command, e))
325+
if hasattr(self.args, "numa_cores_per_instance"):
326+
if self.num_numa_nodes > 0 and self.args.numa_cores_per_instance is not None:
327+
try:
328+
# Get the list of cores
329+
cpu_array_command = \
330+
"numactl -H | grep 'node [0-9]* cpus:' |" \
331+
"sed 's/.*node [0-9]* cpus: *//' | head -{0} |cut -f1-{1} -d' '".format(
332+
self.num_numa_nodes, int(cores_per_node))
333+
cpu_array = subprocess.Popen(
334+
cpu_array_command, shell=True, stdout=subprocess.PIPE,
335+
stderr=subprocess.PIPE).stdout.readlines()
336+
337+
for node_cpus in cpu_array:
338+
node_cpus = str(node_cpus).lstrip("b'").replace("\\n'", " ")
339+
self.cpu_core_list.append([x for x in node_cpus.split(" ") if x != ''])
340+
341+
# If we have the cpuset list, cross check that list with our core list and
342+
# remove cores that are not part of the cpuset list
343+
if self.cpuset_cpus is not None:
344+
for socket, core_list in enumerate(self.cpu_core_list):
345+
self.cpu_core_list[socket] = [x for x in core_list if int(x) in self.cpuset_cpus]
346+
347+
if hasattr(self.args, "verbose") and self.args.verbose:
348+
print("Core list: {}".format(self.cpu_core_list), flush=True)
349+
350+
except Exception as e:
351+
print("Warning: An error occured when getting the list of cores using '{}':\n {}".
352+
format(cpu_array_command, e))
346353

347354
if self.cpuset_cpus is not None:
348355
# Reformat the cpuset_cpus list so that it's split up by node

0 commit comments

Comments
 (0)