Skip to content

Commit 0441e26

Browse files
authored
Merge pull request #199 from Kaggle/upgrade-cuda-11.8
Upgrade to CUDA 11.7
2 parents 59a11e3 + 4443985 commit 0441e26

File tree

4 files changed

+21
-14
lines changed

4 files changed

+21
-14
lines changed

Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ RUN apt-get update && \
2828
touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \
2929
# papermill can replace nbconvert for executing notebooks
3030
pip install papermill && \
31-
pip install jupyterlab-lsp && \
31+
# b/276358430 fix Jupyter lsp freezing up the jupyter server
32+
pip install jupyterlab-lsp "jupyter-lsp==1.5.1" && \
3233
/tmp/clean-layer.sh
3334

3435
# Miniconda

gpu.Dockerfile

+10-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG BASE_TAG=staging
2-
FROM nvidia/cuda:11.4.2-cudnn8-devel-ubuntu18.04 AS nvidia
2+
FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu18.04 AS nvidia
33
FROM gcr.io/kaggle-images/rstats:${BASE_TAG}
44
ARG ncpus=1
55

@@ -10,11 +10,12 @@ COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/
1010
COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
1111

1212
ENV CUDA_MAJOR_VERSION=11
13-
ENV CUDA_MINOR_VERSION=4
14-
ENV CUDA_PATCH_VERSION=2
13+
ENV CUDA_MINOR_VERSION=7
14+
ENV CUDA_PATCH_VERSION=0
1515
ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.$CUDA_PATCH_VERSION
1616
ENV CUDA_PKG_VERSION=$CUDA_MAJOR_VERSION-$CUDA_MINOR_VERSION
17-
ENV CUDNN_VERSION=8.2.4.15
17+
ENV CUDNN_VERSION=8.5.0.96
18+
ENV NCCL_VERSION=2.13.4-1
1819
LABEL com.nvidia.volumes.needed="nvidia_driver"
1920
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
2021
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
@@ -41,8 +42,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
4142
libcudnn8-dev=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
4243
libcublas-$CUDA_PKG_VERSION \
4344
libcublas-dev-$CUDA_PKG_VERSION \
44-
libnccl2=2.11.4-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
45-
libnccl-dev=2.11.4-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
45+
libnccl2=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
46+
libnccl-dev=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
4647
/tmp/clean-layer.sh
4748

4849
ENV CUDA_HOME=/usr/local/cuda
@@ -55,7 +56,7 @@ ENV CUDA_HOME=/usr/local/cuda
5556
ADD ldpaths $R_HOME/etc/ldpaths
5657

5758
# Install tensorflow with GPU support
58-
RUN R -e 'keras::install_keras(tensorflow = "2.6-gpu")' && \
59+
RUN R -e 'keras::install_keras(tensorflow = "gpu")' && \
5960
rm -rf /tmp/tensorflow_gpu && \
6061
/tmp/clean-layer.sh
6162

@@ -70,8 +71,8 @@ RUN CPATH=/usr/local/cuda/targets/x86_64-linux/include install2.r --error --ncpu
7071

7172
# Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels
7273
# without internet (competitions for example). It will detect CUDA and install the proper version.
73-
# Make Torch think we use CUDA 11.3 (https://github.com/mlverse/torch/issues/807)
74-
ENV CUDA=11.3
74+
# Make Torch think we use CUDA 11.8 (https://github.com/mlverse/torch/issues/807)
75+
ENV CUDA=11.7
7576
RUN R -e 'install.packages("torch")'
7677
RUN R -e 'library(torch); install_torch()'
7778

test

+3-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ mkdir -p /tmp/rstats-build/working
7676
# Check that Jupyter server can run; if it dies on startup, the `docker kill` command will throw an error
7777
docker run -d --name=jupyter_test_r --read-only --net=none \
7878
-e HOME=/tmp \
79+
-e NVIDIA_DISABLE_REQUIRE=1 \
7980
-v $PWD:/input:ro -v /tmp/rstats-build/working:/working \
8081
-v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \
8182
-w=/working \
@@ -84,7 +85,7 @@ sleep 3
8485
docker kill jupyter_test_r && docker rm jupyter_test_r
8586

8687
# Check that papermill is installed in python (b/191304257).
87-
docker run --rm --name=papermill_test_r --read-only --net=none \
88+
docker run --rm -e NVIDIA_DISABLE_REQUIRE=1 --name=papermill_test_r --read-only --net=none \
8889
"$IMAGE_TAG" python -c 'import sys;import papermill as pm; print(pm.__version__)'
8990

9091

@@ -93,6 +94,7 @@ docker run --rm --name=papermill_test_r --read-only --net=none \
9394
docker run --rm -t --net=none \
9495
-e HOME=/tmp \
9596
-e TF_FORCE_GPU_ALLOW_GROWTH=true \
97+
-e NVIDIA_DISABLE_REQUIRE=1 \
9698
-v $PWD:/input:ro -v /tmp/rstats-build/working:/working \
9799
-v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \
98100
-w=/working \

tests/test_keras.R

+6-3
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ test_that("model training", {
1212
layer_dropout(rate=0.4) %>%
1313
layer_dense(unit=3, activation='softmax')
1414

15+
optimizers <- keras::keras$optimizers
16+
1517
model %>% compile(
1618
loss = 'categorical_crossentropy',
17-
optimizer = optimizer_rmsprop(),
19+
optimizer = optimizers$RMSprop(),
1820
metrics = c('accuracy')
1921
)
2022

@@ -82,7 +84,7 @@ test_that("CNN model training", {
8284
datagen %>% fit_image_data_generator(train.feature)
8385

8486
history <- model %>%
85-
fit_generator(
87+
fit(
8688
flow_images_from_data(train.feature, train.label, datagen, batch_size = 10),
8789
steps_per_epoch = nrow(train.feature) / 10,
8890
epochs = 1)
@@ -108,5 +110,6 @@ test_that("flow_images_from_dataframe", {
108110
class_mode = NULL,
109111
target_size = c(224, 224))
110112

111-
expect_is(pred, "keras_preprocessing.image.dataframe_iterator.DataFrameIterator")
113+
batch <- generator_next(pred, completed = NULL)
114+
expect_gt(length(batch), 0)
112115
})

0 commit comments

Comments
 (0)