Skip to content

Commit 30575a0

Browse files
incorporated the suggestions
1 parent 46fa8f2 commit 30575a0

File tree

2 files changed

+18
-17
lines changed

2 files changed

+18
-17
lines changed

multiprocess_functions.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,26 @@ def get_total_cores():
2020
thread_per_core = int(awk.communicate()[0])
2121
return os.cpu_count()//thread_per_core
2222

23-
def get_file_size(file_path):
24-
"""Gets the size of the file in bytes."""
25-
size = subprocess.check_output(["wc", "-c", file_path])
26-
size = int(size.decode("utf-8").split()[0])
27-
return size
28-
29-
def get_core_list(cores_per_process):
23+
def get_numa_nodes():
3024
#numa_nodes
3125
lscpu = subprocess.Popen(["lscpu"], stdout=subprocess.PIPE)
3226
grep = subprocess.Popen(["grep", "NUMA node(s):"], stdin=lscpu.stdout, stdout=subprocess.PIPE)
3327
awk = subprocess.Popen(["awk", "{print $3}"], stdin=grep.stdout, stdout=subprocess.PIPE)
3428
#Get the output
3529
numa_nodes = int(awk.communicate()[0])
30+
return numa_nodes
31+
32+
def get_file_size(file_path):
33+
"""Gets the size of the file in bytes."""
34+
size = subprocess.check_output(["wc", "-c", file_path])
35+
size = int(size.decode("utf-8").split()[0])
36+
return size
3637

38+
def get_core_list(cores_per_process):
39+
40+
numa_nodes = get_numa_nodes()
3741
core_min_max = []
38-
cores_in_numa = os.cpu_count()
42+
cores_in_numa = get_total_cores()
3943
for i in range(numa_nodes):
4044
lscpu = subprocess.Popen(["lscpu"], stdout=subprocess.PIPE)
4145
grep = subprocess.Popen(["grep", "NUMA node" + str(i) + " CPU(s):"], stdin=lscpu.stdout, stdout=subprocess.PIPE)
@@ -51,7 +55,7 @@ def get_core_list(cores_per_process):
5155
for j in range(cores_in_numa//cores_per_process):
5256
core_list = core_list + list(range(core_min_max[i][0] + j*cores_per_process, core_min_max[i][0] + (j+1)*cores_per_process))
5357
else: # single process case or single socket case
54-
core_list = range(os.cpu_count()//2)
58+
core_list = range(get_total_cores())
5559

5660
return core_list, numa_nodes
5761

@@ -108,12 +112,7 @@ def update_queue(result):
108112

109113
def create_process_list(files, MIN_MEM_PER_PROCESS, MIN_CORES_PER_PROCESS, LOAD_BALANCE_FACTOR):
110114
total_cores = get_total_cores()
111-
#numa_nodes
112-
lscpu = subprocess.Popen(["lscpu"], stdout=subprocess.PIPE)
113-
grep = subprocess.Popen(["grep", "NUMA node(s):"], stdin=lscpu.stdout, stdout=subprocess.PIPE)
114-
awk = subprocess.Popen(["awk", "{print $3}"], stdin=grep.stdout, stdout=subprocess.PIPE)
115-
#Get the output
116-
numa_nodes = int(awk.communicate()[0])
115+
numa_nodes = get_numa_nodes()
117116
cores_per_numa = total_cores//numa_nodes
118117

119118
#sockets
@@ -135,9 +134,10 @@ def create_process_list(files, MIN_MEM_PER_PROCESS, MIN_CORES_PER_PROCESS, LOAD_
135134
print("Memory max {}, Core max {}, Load Balance max {}".format(mm, cm, lbm))
136135
max_p = min(mm, cm, lbm)
137136

138-
# number which is 2^x and less than or equal to max
137+
# largest number which is 2^x and less than or equal to max
139138
max_p = 2**int(math.log2(max_p))
140139

140+
# TODO: Have linear backoff for max_p when number of files are less
141141
max_processes_list = []
142142
while max_p > 0:
143143
max_processes_list.append(max_p*numa_nodes)
@@ -155,6 +155,7 @@ def start_process_list(files, max_processes_list, bash_subprocess):
155155
for max_processes in max_processes_list:
156156
os.environ["OMP_NUM_THREADS"] = str(total_cores//max_processes)
157157
print("Number of OMP Threads = {}, for {} instances".format(os.environ.get('OMP_NUM_THREADS'), max_processes))
158+
# TODO: Have linear backoff condition when number of files are less
158159
if len(files) >= max_processes:
159160
returned_files = multiprocessing_run(files, max_processes, bash_subprocess)
160161
print("Following protein files couldn't be processed with {} instances".format(max_processes))

run_multiprocess_pre_multimer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def main(argv):
7474
for i, file in enumerate(files):
7575
files[i] = os.path.join(directory, file)
7676

77-
MIN_MEM_PER_PROCESS=50*1024 # 64 GB
77+
MIN_MEM_PER_PROCESS=60*1024 # 64 GB
7878
MIN_CORES_PER_PROCESS=4
7979
LOAD_BALANCE_FACTOR=1
8080

0 commit comments

Comments
 (0)