@@ -33,12 +33,11 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \
33
33
KMP_BLOCKTIME=1 \
34
34
KMP_SETTINGS=1
35
35
36
- ARG TF_VERSION
37
-
38
36
WORKDIR /
39
37
COPY requirements.txt .
40
38
41
- RUN python -m pip install --no-cache-dir -r requirements.txt
39
+ RUN python -m pip install --no-cache-dir -r requirements.txt && \
40
+ rm -rf requirements.txt
42
41
43
42
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
44
43
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
@@ -53,12 +52,13 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \
53
52
ENV PATH /usr/bin:/root/conda/envs/idp/bin:/root/conda/condabin:~/conda/bin/:${PATH}
54
53
55
54
ENV TF_ENABLE_ONEDNN_OPTS=1
56
- ARG TF_VERSION
57
55
58
56
WORKDIR /
59
57
COPY requirements.txt .
60
58
61
- RUN python -m pip install --no-cache-dir -r requirements.txt
59
+ RUN conda run -n idp python -m pip install --no-cache-dir -r requirements.txt && \
60
+ rm -rf requirements.txt && \
61
+ conda clean -y --all
62
62
63
63
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
64
64
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
@@ -77,37 +77,43 @@ EXPOSE 8888
77
77
78
78
CMD ["bash" , "-c" , "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX" ]
79
79
80
- FROM tf-base-${PACKAGE_OPTION} AS openmpi
80
+ FROM tf-base-${PACKAGE_OPTION} AS multinode
81
81
82
82
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
83
+ build-essential \
84
+ cmake \
85
+ g++ \
86
+ gcc \
87
+ git \
88
+ libgl1-mesa-glx \
89
+ libglib2.0-0 \
83
90
libopenmpi-dev \
91
+ numactl \
84
92
openmpi-bin \
85
- openmpi-common
93
+ openmpi-common \
94
+ python3-dev \
95
+ unzip \
96
+ virtualenv
86
97
87
- WORKDIR /
88
- COPY ompi-requirements.txt .
98
+ ENV SIGOPT_PROJECT=.
89
99
90
- RUN python -m pip install --no-cache-dir -r ompi-requirements.txt
100
+ WORKDIR /
101
+ COPY multinode/requirements.txt requirements.txt
91
102
92
- FROM openmpi AS horovod
103
+ RUN python -m pip install --no-cache-dir -r requirements.txt && \
104
+ rm -rf requirements.txt
93
105
94
- ENV LD_LIBRARY_PATH /lib64/: /usr/lib64/ :/usr/local/lib64
106
+ ENV LD_LIBRARY_PATH= "/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}: /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib :/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib"
95
107
96
108
RUN apt-get install -y --no-install-recommends --fix-missing \
97
- unzip \
98
109
openssh-client \
99
110
openssh-server && \
100
- rm /etc/ssh/ssh_host_*_key \
101
- /etc/ssh/ssh_host_*_key.pub
102
-
103
- ENV OMPI_ALLOW_RUN_AS_ROOT=1
104
- ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
105
-
106
- ENV OMPI_MCA_tl_tcp_if_exclude="lo,docker0"
111
+ rm /etc/ssh/ssh_host_*_key \
112
+ /etc/ssh/ssh_host_*_key.pub && \
113
+ apt-get clean && \
114
+ rm -rf /var/lib/apt/lists/*
107
115
108
- # Install OpenSSH for MPI to communicate between containers
109
- RUN mkdir -p /var/run/sshd && \
110
- echo 'LoginGraceTime 0' >> /etc/ssh/sshd_config
116
+ RUN mkdir -p /var/run/sshd
111
117
112
118
# Install Horovod
113
119
ARG HOROVOD_WITH_TENSORFLOW=1
@@ -116,43 +122,32 @@ ARG HOROVOD_WITHOUT_PYTORCH=1
116
122
ARG HOROVOD_WITHOUT_GLOO=1
117
123
ARG HOROVOD_WITH_MPI=1
118
124
119
- RUN apt-get install -y --no-install-recommends --fix-missing \
120
- build-essential \
121
- cmake \
122
- g++ \
123
- gcc \
124
- git \
125
- libgl1-mesa-glx \
126
- libglib2.0-0 \
127
- python3-dev && \
128
- apt-get clean && \
129
- rm -rf /var/lib/apt/lists/*
130
-
131
- WORKDIR /
132
- COPY hvd-requirements.txt .
133
-
134
- RUN python -m pip install --no-cache-dir -r hvd-requirements.txt
135
-
136
- ENV SIGOPT_PROJECT=.
137
-
138
- RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \
139
- wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE
125
+ ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64
140
126
141
- FROM horovod AS multinode- pip
127
+ RUN python -m pip install --no-cache-dir horovod==0.28.1
142
128
143
- WORKDIR /
144
- COPY multinode-requirements.txt .
129
+ ARG PYTHON_VERSION
145
130
146
- RUN python -m pip install --no-cache-dir -r multinode-requirements.txt
131
+ COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh
147
132
148
- FROM horovod AS multinode-idp
133
+ # modify generate_ssh_keys to be a helper script
134
+ # print how to use helper script on bash startup
135
+ # Avoids loop for further execution of the startup file
136
+ ARG PACKAGE_OPTION=pip
137
+ ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages"
138
+ RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages" ; fi && \
139
+ echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \
140
+ cat '/generate_ssh_keys.sh' >> ~/.startup && \
141
+ rm -rf /generate_ssh_keys.sh
149
142
150
- WORKDIR /
151
- COPY multinode-requirements.txt .
143
+ COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
144
+ COPY multinode/sshd_config /etc/ssh/sshd_config
145
+ COPY multinode/ssh_config /etc/ssh/ssh_config
152
146
153
- RUN python -m pip install --no-cache-dir -r multinode-requirements.txt
147
+ RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \
148
+ wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE
154
149
155
- FROM ${PYTHON_BASE} AS itex-xpu-base-pip
150
+ FROM ${PYTHON_BASE} AS itex-xpu-base
156
151
157
152
RUN apt-get update && \
158
153
apt-get install -y --no-install-recommends --fix-missing \
@@ -219,54 +214,7 @@ ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/maste
219
214
220
215
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH
221
216
222
- FROM ${PYTHON_BASE} AS itex-xpu-base-idp
223
-
224
- RUN apt-get update && \
225
- apt-get install -y --no-install-recommends --fix-missing \
226
- apt-utils \
227
- build-essential \
228
- clinfo \
229
- git \
230
- gnupg2 \
231
- gpg-agent \
232
- rsync \
233
- unzip \
234
- wget && \
235
- apt-get clean && \
236
- rm -rf /var/lib/apt/lists/*
237
-
238
- ARG ICD_VER
239
- ARG LEVEL_ZERO_GPU_VER
240
- ARG LEVEL_ZERO_VER
241
- ARG LEVEL_ZERO_DEV_VER
242
-
243
- RUN no_proxy="" NO_PROXY="" wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
244
- gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
245
- RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \
246
- tee /etc/apt/sources.list.d/intel-gpu-jammy.list
247
-
248
- RUN no_proxy="" NO_PROXY="" apt-get update && \
249
- apt-get install -y --no-install-recommends --fix-missing \
250
- intel-opencl-icd=${ICD_VER} \
251
- intel-level-zero-gpu=${LEVEL_ZERO_GPU_VER} \
252
- level-zero=${LEVEL_ZERO_VER} \
253
- level-zero-dev=${LEVEL_ZERO_DEV_VER} && \
254
- apt-get clean && \
255
- rm -rf /var/lib/apt/lists/*
256
-
257
- ARG ITEX_VER="2.15.0.1"
258
-
259
- RUN conda install -n idp -y intel-extension-for-tensorflow=${ITEX_VER}=*xpu* \
260
- -c https://software.repos.intel.com/python/conda
261
-
262
- ENV LD_LIBRARY_PATH=/opt/conda/envs/idp/lib:$LD_LIBRARY_PATH
263
-
264
- ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
265
- ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
266
- ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-tensorflow.txt /licenses/
267
- ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-optimization-for-horovod.txt /licenses/
268
-
269
- FROM itex-xpu-base-${PACKAGE_OPTION} AS itex-xpu-jupyter
217
+ FROM itex-xpu-base AS itex-xpu-jupyter
270
218
271
219
WORKDIR /jupyter
272
220
COPY jupyter-requirements.txt .
0 commit comments