Skip to content

Commit 47d5dfb

Browse files
dependabot[bot]tylertitsworth
and
tylertitsworth
authored
Bump the tensorflow group across 1 directory with 8 updates (#315)
Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: tylertitsworth <tyler.titsworth@intel.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: tylertitsworth <tyler.titsworth@intel.com>
1 parent d952100 commit 47d5dfb

16 files changed

+258
-143
lines changed

tensorflow/Dockerfile

+49-101
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,11 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \
3333
KMP_BLOCKTIME=1 \
3434
KMP_SETTINGS=1
3535

36-
ARG TF_VERSION
37-
3836
WORKDIR /
3937
COPY requirements.txt .
4038

41-
RUN python -m pip install --no-cache-dir -r requirements.txt
39+
RUN python -m pip install --no-cache-dir -r requirements.txt && \
40+
rm -rf requirements.txt
4241

4342
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
4443
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
@@ -53,12 +52,13 @@ ENV KMP_AFFINITY='granularity=fine,verbose,compact,1,0' \
5352
ENV PATH /usr/bin:/root/conda/envs/idp/bin:/root/conda/condabin:~/conda/bin/:${PATH}
5453

5554
ENV TF_ENABLE_ONEDNN_OPTS=1
56-
ARG TF_VERSION
5755

5856
WORKDIR /
5957
COPY requirements.txt .
6058

61-
RUN python -m pip install --no-cache-dir -r requirements.txt
59+
RUN conda run -n idp python -m pip install --no-cache-dir -r requirements.txt && \
60+
rm -rf requirements.txt && \
61+
conda clean -y --all
6262

6363
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
6464
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
@@ -77,37 +77,43 @@ EXPOSE 8888
7777

7878
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX"]
7979

80-
FROM tf-base-${PACKAGE_OPTION} AS openmpi
80+
FROM tf-base-${PACKAGE_OPTION} AS multinode
8181

8282
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
83+
build-essential \
84+
cmake \
85+
g++ \
86+
gcc \
87+
git \
88+
libgl1-mesa-glx \
89+
libglib2.0-0 \
8390
libopenmpi-dev \
91+
numactl \
8492
openmpi-bin \
85-
openmpi-common
93+
openmpi-common \
94+
python3-dev \
95+
unzip \
96+
virtualenv
8697

87-
WORKDIR /
88-
COPY ompi-requirements.txt .
98+
ENV SIGOPT_PROJECT=.
8999

90-
RUN python -m pip install --no-cache-dir -r ompi-requirements.txt
100+
WORKDIR /
101+
COPY multinode/requirements.txt requirements.txt
91102

92-
FROM openmpi AS horovod
103+
RUN python -m pip install --no-cache-dir -r requirements.txt && \
104+
rm -rf requirements.txt
93105

94-
ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64
106+
ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib"
95107

96108
RUN apt-get install -y --no-install-recommends --fix-missing \
97-
unzip \
98109
openssh-client \
99110
openssh-server && \
100-
rm /etc/ssh/ssh_host_*_key \
101-
/etc/ssh/ssh_host_*_key.pub
102-
103-
ENV OMPI_ALLOW_RUN_AS_ROOT=1
104-
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
105-
106-
ENV OMPI_MCA_tl_tcp_if_exclude="lo,docker0"
111+
rm /etc/ssh/ssh_host_*_key \
112+
/etc/ssh/ssh_host_*_key.pub && \
113+
apt-get clean && \
114+
rm -rf /var/lib/apt/lists/*
107115

108-
# Install OpenSSH for MPI to communicate between containers
109-
RUN mkdir -p /var/run/sshd && \
110-
echo 'LoginGraceTime 0' >> /etc/ssh/sshd_config
116+
RUN mkdir -p /var/run/sshd
111117

112118
# Install Horovod
113119
ARG HOROVOD_WITH_TENSORFLOW=1
@@ -116,43 +122,32 @@ ARG HOROVOD_WITHOUT_PYTORCH=1
116122
ARG HOROVOD_WITHOUT_GLOO=1
117123
ARG HOROVOD_WITH_MPI=1
118124

119-
RUN apt-get install -y --no-install-recommends --fix-missing \
120-
build-essential \
121-
cmake \
122-
g++ \
123-
gcc \
124-
git \
125-
libgl1-mesa-glx \
126-
libglib2.0-0 \
127-
python3-dev && \
128-
apt-get clean && \
129-
rm -rf /var/lib/apt/lists/*
130-
131-
WORKDIR /
132-
COPY hvd-requirements.txt .
133-
134-
RUN python -m pip install --no-cache-dir -r hvd-requirements.txt
135-
136-
ENV SIGOPT_PROJECT=.
137-
138-
RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \
139-
wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE
125+
ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64
140126

141-
FROM horovod AS multinode-pip
127+
RUN python -m pip install --no-cache-dir horovod==0.28.1
142128

143-
WORKDIR /
144-
COPY multinode-requirements.txt .
129+
ARG PYTHON_VERSION
145130

146-
RUN python -m pip install --no-cache-dir -r multinode-requirements.txt
131+
COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh
147132

148-
FROM horovod AS multinode-idp
133+
# modify generate_ssh_keys to be a helper script
134+
# print how to use helper script on bash startup
135+
# Avoids loop for further execution of the startup file
136+
ARG PACKAGE_OPTION=pip
137+
ARG PYPATH="/usr/local/lib/python${PYTHON_VERSION}/dist-packages"
138+
RUN if [ "${PACKAGE_OPTION}" = "idp" ]; then PYPATH="/opt/conda/envs/idp/lib/python${PYTHON_VERSION}/site-packages"; fi && \
139+
echo "source ${PYPATH}/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \
140+
cat '/generate_ssh_keys.sh' >> ~/.startup && \
141+
rm -rf /generate_ssh_keys.sh
149142

150-
WORKDIR /
151-
COPY multinode-requirements.txt .
143+
COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
144+
COPY multinode/sshd_config /etc/ssh/sshd_config
145+
COPY multinode/ssh_config /etc/ssh/ssh_config
152146

153-
RUN python -m pip install --no-cache-dir -r multinode-requirements.txt
147+
RUN wget --progress=dot:giga --no-check-certificate https://github.com/intel/neural-compressor/raw/master/docker/third-party-programs-tensorflow.txt -O /licenses/inc-third-party-programs-tensorflow.txt && \
148+
wget --progress=dot:giga --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licenses/INC_LICENSE
154149

155-
FROM ${PYTHON_BASE} AS itex-xpu-base-pip
150+
FROM ${PYTHON_BASE} AS itex-xpu-base
156151

157152
RUN apt-get update && \
158153
apt-get install -y --no-install-recommends --fix-missing \
@@ -219,54 +214,7 @@ ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/maste
219214

220215
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH
221216

222-
FROM ${PYTHON_BASE} AS itex-xpu-base-idp
223-
224-
RUN apt-get update && \
225-
apt-get install -y --no-install-recommends --fix-missing \
226-
apt-utils \
227-
build-essential \
228-
clinfo \
229-
git \
230-
gnupg2 \
231-
gpg-agent \
232-
rsync \
233-
unzip \
234-
wget && \
235-
apt-get clean && \
236-
rm -rf /var/lib/apt/lists/*
237-
238-
ARG ICD_VER
239-
ARG LEVEL_ZERO_GPU_VER
240-
ARG LEVEL_ZERO_VER
241-
ARG LEVEL_ZERO_DEV_VER
242-
243-
RUN no_proxy="" NO_PROXY="" wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
244-
gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
245-
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \
246-
tee /etc/apt/sources.list.d/intel-gpu-jammy.list
247-
248-
RUN no_proxy="" NO_PROXY="" apt-get update && \
249-
apt-get install -y --no-install-recommends --fix-missing \
250-
intel-opencl-icd=${ICD_VER} \
251-
intel-level-zero-gpu=${LEVEL_ZERO_GPU_VER} \
252-
level-zero=${LEVEL_ZERO_VER} \
253-
level-zero-dev=${LEVEL_ZERO_DEV_VER} && \
254-
apt-get clean && \
255-
rm -rf /var/lib/apt/lists/*
256-
257-
ARG ITEX_VER="2.15.0.1"
258-
259-
RUN conda install -n idp -y intel-extension-for-tensorflow=${ITEX_VER}=*xpu* \
260-
-c https://software.repos.intel.com/python/conda
261-
262-
ENV LD_LIBRARY_PATH=/opt/conda/envs/idp/lib:$LD_LIBRARY_PATH
263-
264-
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/THIRD-PARTY-PROGRAMS.txt /licenses/
265-
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-program-of-intel-extension-for-tensorflow.txt /licenses/
266-
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-tensorflow.txt /licenses/
267-
ADD https://raw.githubusercontent.com/intel/intel-extension-for-tensorflow/master/third-party-programs/dockerlayer/third-party-programs-of-intel-optimization-for-horovod.txt /licenses/
268-
269-
FROM itex-xpu-base-${PACKAGE_OPTION} AS itex-xpu-jupyter
217+
FROM itex-xpu-base AS itex-xpu-jupyter
270218

271219
WORKDIR /jupyter
272220
COPY jupyter-requirements.txt .

tensorflow/README.md

+100-3
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,16 @@ The images below are built only with CPU optimizations (GPU acceleration support
8585

8686
| Tag(s) | TensorFlow | ITEX | Dockerfile |
8787
| --------------------------- | ----------- | ------------ | --------------- |
88-
| `2.15.0-pip-base`, `latest` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
88+
| `2.15.1-pip-base`, `latest` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] |
89+
| `2.15.0-pip-base` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
8990
| `2.14.0-pip-base` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] |
9091
| `2.13-pip-base` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] |
9192

9293
The images below additionally include [Jupyter Notebook](https://jupyter.org/) server:
9394

9495
| Tag(s) | TensorFlow | ITEX | Dockerfile |
9596
| -------------------- | ----------- | ------------- | --------------- |
97+
| `2.15.1-pip-jupyter` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] |
9698
| `2.15.0-pip-jupyter` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
9799
| `2.14.0-pip-jupyter` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] |
98100
| `2.13-pip-jupyter` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] |
@@ -105,7 +107,7 @@ docker run -it --rm \
105107
--net=host \
106108
-v $PWD/workspace:/workspace \
107109
-w /workspace \
108-
intel/intel-extension-for-tensorflow:2.15.0-pip-jupyter
110+
intel/intel-extension-for-tensorflow:2.15.1-pip-jupyter
109111
```
110112

111113
After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server.
@@ -116,10 +118,102 @@ The images below additionally include [Horovod]:
116118

117119
| Tag(s) | Tensorflow | ITEX | Horovod | Dockerfile |
118120
| ------------------------------ | --------- | ------------ | --------- | --------------- |
121+
| `2.15.1-pip-multinode` | [v2.15.1] | [v2.15.0.1] | [v0.28.1] | [v0.4.0-Beta] |
119122
| `2.15.0-pip-multinode` | [v2.15.0] | [v2.15.0.0] | [v0.28.1] | [v0.4.0-Beta] |
120123
| `2.14.0-pip-openmpi-multinode` | [v2.14.1] | [v2.14.0.1] | [v0.28.1] | [v0.3.4] |
121124
| `2.13-pip-openmpi-mulitnode` | [v2.13.0] | [v2.13.0.0] | [v0.28.0] | [v0.2.3] |
122125

126+
> [!NOTE]
127+
> Passwordless SSH connection is also enabled in the image, but the container does not contain any SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/etc/ssh/authorized_keys`.
128+
129+
> [!TIP]
130+
> Before mounting any keys, modify the permissions of those files with `chmod 600 authorized_keys; chmod 600 id_rsa` to grant read access for the default user account.
131+
132+
#### Setup and Run ITEX Multi-Node Container
133+
134+
Some additional assembly is required to utilize this container with OpenSSH. To perform any kind of DDP (Distributed Data Parallel) execution, containers are assigned the roles of launcher and worker respectively:
135+
136+
SSH Server (Worker)
137+
138+
1. *Authorized Keys* : `/etc/ssh/authorized_keys`
139+
140+
SSH Client (Launcher)
141+
142+
1. *Private User Key* : `/root/.ssh/id_rsa`
143+
144+
To add these files correctly please follow the steps described below.
145+
146+
1. Setup ID Keys
147+
148+
You can use the commands provided below to [generate the identity keys](https://www.ssh.com/academy/ssh/keygen#creating-an-ssh-key-pair-for-user-authentication) for OpenSSH.
149+
150+
```bash
151+
ssh-keygen -q -N "" -t rsa -b 4096 -f ./id_rsa
152+
touch authorized_keys
153+
cat id_rsa.pub >> authorized_keys
154+
```
155+
156+
2. Configure the permissions and ownership for all of the files you have created so far
157+
158+
```bash
159+
chmod 600 id_rsa config authorized_keys
160+
chown root:root id_rsa.pub id_rsa config authorized_keys
161+
```
162+
163+
3. Create a hostfile for horovod. (Optional)
164+
165+
```txt
166+
Host host1
167+
HostName <Hostname of host1>
168+
IdentitiesOnly yes
169+
IdentityFile ~/.root/id_rsa
170+
Port <SSH Port>
171+
Host host2
172+
HostName <Hostname of host2>
173+
IdentitiesOnly yes
174+
IdentityFile ~/.root/id_rsa
175+
Port <SSH Port>
176+
...
177+
```
178+
179+
4. Configure [Horovod] in your python script
180+
181+
```python
182+
import horovod.torch as hvd
183+
184+
hvd.init()
185+
```
186+
187+
5. Now start the workers and execute DDP on the launcher
188+
189+
1. Worker run command:
190+
191+
```bash
192+
docker run -it --rm \
193+
--net=host \
194+
-v $PWD/authorized_keys:/etc/ssh/authorized_keys \
195+
-v $PWD/tests:/workspace/tests \
196+
-w /workspace \
197+
intel/intel-optimized-tensorflow:2.15.1-pip-multinode \
198+
bash -c '/usr/sbin/sshd -D'
199+
```
200+
201+
2. Launcher run command:
202+
203+
```bash
204+
docker run -it --rm \
205+
--net=host \
206+
-v $PWD/id_rsa:/root/.ssh/id_rsa \
207+
-v $PWD/tests:/workspace/tests \
208+
-v $PWD/hostfile:/root/ssh/config \
209+
-w /workspace \
210+
intel/intel-optimized-tensorflow:2.15.1-pip-multinode \
211+
bash -c 'horovodrun --verbose -np 2 -H host1:1,host2:1 /workspace/tests/tf_base_test.py'
212+
```
213+
214+
> [!NOTE]
215+
> [Intel® MPI] can be configured based on your machine settings. If the above commands do not work for you, see the documentation for how to configure based on your network.
216+
123217
---
124218

125219
The images below are [TensorFlow* Serving] with CPU Optimizations:
@@ -151,14 +245,16 @@ The images below are built only with CPU optimizations (GPU acceleration support
151245

152246
| Tag(s) | TensorFlow | ITEX | Dockerfile |
153247
| --------------------------- | ----------- | ------------ | --------------- |
154-
| `2.15.0-idp-base`, `latest` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
248+
| `2.15.1-idp-base` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] |
249+
| `2.15.0-idp-base` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
155250
| `2.14.0-idp-base` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] |
156251
| `2.13-idp-base` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] |
157252

158253
The images below additionally include [Jupyter Notebook](https://jupyter.org/) server:
159254

160255
| Tag(s) | TensorFlow | ITEX | Dockerfile |
161256
| -------------------- | ----------- | ------------- | --------------- |
257+
| `2.15.1-idp-jupyter` | [v2.15.1] | [v2.15.0.1] | [v0.4.0-Beta] |
162258
| `2.15.0-idp-jupyter` | [v2.15.0] | [v2.15.0.0] | [v0.4.0-Beta] |
163259
| `2.14.0-idp-jupyter` | [v2.14.1] | [v2.14.0.1] | [v0.3.4] |
164260
| `2.13-idp-jupyter` | [v2.13.0] | [v2.13.0.0] | [v0.2.3] |
@@ -167,6 +263,7 @@ The images below additionally include [Horovod]:
167263

168264
| Tag(s) | Tensorflow | ITEX | Horovod | Dockerfile |
169265
| ------------------------------ | --------- | ------------ | --------- | --------------- |
266+
| `2.15.1-idp-multinode` | [v2.15.1] | [v2.15.0.1] | [v0.28.1] | [v0.4.0-Beta] |
170267
| `2.15.0-idp-multinode` | [v2.15.0] | [v2.15.0.0] | [v0.28.1] | [v0.4.0-Beta] |
171268
| `2.14.0-idp-openmpi-multinode` | [v2.14.1] | [v2.14.0.1] | [v0.28.1] | [v0.3.4] |
172269
| `2.13-idp-openmpi-mulitnode` | [v2.13.0] | [v2.13.0.0] | [v0.28.0] | [v0.2.3] |

0 commit comments

Comments
 (0)