diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile deleted file mode 100644 index 5ed856259a9..00000000000 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# -# THIS IS A GENERATED DOCKERFILE. -# -# This file was assembled from multiple pieces, whose use is documented -# throughout. Please refer to the TensorFlow dockerfiles documentation -# for more information. - -ARG UBUNTU_VERSION=18.04 - -FROM ubuntu:${UBUNTU_VERSION} AS base - -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - curl \ - git \ - libcurl3-dev \ - libfreetype6-dev \ - libhdf5-serial-dev \ - libzmq3-dev \ - pkg-config \ - rsync \ - software-properties-common \ - sudo \ - unzip \ - zip \ - zlib1g-dev \ - openjdk-8-jdk \ - openjdk-8-jre-headless \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV CI_BUILD_PYTHON python - -# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version -ARG CACHE_STOP=1 -# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 -ARG CHECKOUT_TF_SRC=0 -# In case of Python 2.7+ we need to add passwd entries for user and group id -RUN chmod a+w /etc/passwd /etc/group -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true - -# See http://bugs.python.org/issue19846 -ENV LANG C.UTF-8 - -RUN apt-get update && apt-get install -y \ - python3 \ - python3-pip - -RUN python3 -m pip --no-cache-dir install --upgrade \ - pip \ - setuptools - -# Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python - -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - git \ - wget \ - openjdk-8-jdk \ - python3-dev \ - virtualenv \ - swig - -RUN python3 -m pip --no-cache-dir install \ - Pillow \ - h5py \ - keras_preprocessing \ - matplotlib \ - mock \ - 'numpy<1.19.0' \ - scipy \ - sklearn \ - pandas \ - future \ - portpicker \ - enum34 - -# Install bazel -ARG BAZEL_VERSION=3.1.0 -RUN mkdir /bazel && \ - wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ - wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ - chmod +x /bazel/installer.sh && \ - /bazel/installer.sh && \ - rm -f /bazel/installer.sh - -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) - -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi - -# Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun - -# Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf - -# Install OpenSSH for MPI to communicate between containers -RUN mkdir -p /var/run/sshd - -# Allow OpenSSH to talk to containers without asking for confirmation -RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config - -# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 -ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true - -COPY bashrc /etc/bash.bashrc -RUN chmod a+rwx /etc/bash.bashrc - -RUN python3 -m pip install --no-cache-dir jupyter matplotlib -# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 -RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 -RUN jupyter serverextension enable --py jupyter_http_over_ws - -RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ -RUN mkdir /.local && chmod a+rwx /.local -RUN apt-get install -y --no-install-recommends wget -# some examples require git to fetch dependencies -RUN apt-get install -y --no-install-recommends git -WORKDIR /tf/tensorflow-tutorials -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb -COPY readme-for-jupyter.md README.md -RUN apt-get autoremove -y && apt-get remove -y wget -WORKDIR /tf -EXPOSE 8888 - -RUN python3 -m ipykernel.kernelspec - -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile similarity index 52% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile index a4a0bee0bc6..34485a528cd 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,11 +19,13 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} AS base -RUN apt-get update && apt-get install -y --no-install-recommends \ +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ curl \ git \ @@ -50,14 +52,13 @@ ENV CI_BUILD_PYTHON python ARG CACHE_STOP=1 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -# In case of Python 2.7+ we need to add passwd entries for user and group id -RUN chmod a+w /etc/passwd /etc/group -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -68,78 +69,37 @@ RUN python3 -m pip --no-cache-dir install --upgrade \ # Some TF tools expect a "python" binary RUN ln -s $(which python3) /usr/local/bin/python -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - git \ - wget \ - openjdk-8-jdk \ - python3-dev \ - virtualenv \ - swig - -RUN python3 -m pip --no-cache-dir install \ - Pillow \ - h5py \ - keras_preprocessing \ - matplotlib \ - mock \ - 'numpy<1.19.0' \ - scipy \ - sklearn \ - pandas \ - future \ - portpicker \ - enum34 +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl # Install bazel ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ - wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ - wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ - chmod +x /bazel/installer.sh && \ - /bazel/installer.sh && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ rm -f /bazel/installer.sh -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -151,7 +111,22 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi # Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..85e271f54f0 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile similarity index 50% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile index 00c21e287f1..7a46ea0707d 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,14 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} as base -RUN apt-get update && apt-get install -y curl - # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc @@ -110,20 +107,8 @@ RUN python3 -m pip install --no-cache-dir jupyter matplotlib RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 RUN jupyter serverextension enable --py jupyter_http_over_ws -RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -RUN apt-get install -y --no-install-recommends wget -# some examples require git to fetch dependencies -RUN apt-get install -y --no-install-recommends git -WORKDIR /tf/tensorflow-tutorials -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb -COPY readme-for-jupyter.md README.md -RUN apt-get autoremove -y && apt-get remove -y wget WORKDIR /tf EXPOSE 8888 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile similarity index 56% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile index bef75f1e495..8fb1ee56930 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,14 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} as base -RUN apt-get update && apt-get install -y curl - # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..34485a528cd --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,132 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..85e271f54f0 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..65043d18443 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,112 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..69efc88cd35 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile index ffc951f3fc3..b1f1edfe36e 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile @@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 +ARG PYTHON=python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - python3 \ - python3-pip + curl \ + software-properties-common -RUN python3 -m pip --no-cache-dir install --upgrade \ +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ pip \ setuptools # Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ curl diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..92b8101078c --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,142 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..72275fce911 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,128 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile index 10ae251d7ae..f8ae3df3f52 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile @@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 +ARG PYTHON=python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - python3 \ - python3-pip + curl \ + software-properties-common -RUN python3 -m pip --no-cache-dir install --upgrade \ +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ pip \ setuptools # Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ curl diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..09527a82523 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,122 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..a703ed38dcc --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,108 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile deleted file mode 100644 index dab42914df3..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 -ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile deleted file mode 100644 index 1e1704f89a8..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -# Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile deleted file mode 100644 index 67055ab244a..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) - -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi - -# Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun - -# Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf - -# Install OpenSSH for MPI to communicate between containers -RUN mkdir -p /var/run/sshd - -# Allow OpenSSH to talk to containers without asking for confirmation -RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile new file mode 100644 index 00000000000..dabe310b306 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile @@ -0,0 +1,21 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile new file mode 100644 index 00000000000..f018c3a2fc5 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile @@ -0,0 +1,16 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile new file mode 100644 index 00000000000..3150c7a108b --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile @@ -0,0 +1,4 @@ +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile new file mode 100644 index 00000000000..63c1e13443c --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile @@ -0,0 +1,16 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile new file mode 100644 index 00000000000..cf899900941 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile @@ -0,0 +1,28 @@ +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 4b52d0553ba..83829d73346 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -32,7 +32,6 @@ releases: tag_specs: - "{nightly}{jupyter}" - "{_TAG_PREFIX}{ubuntu-devel}" - # Built per-release and pushed to tensorflow/tensorflow # --arg _TAG_PREFIX= should be set to "1.11" (for example) or "latest". versioned: @@ -44,6 +43,10 @@ releases: - "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}" - "{_TAG_PREFIX}{ubuntu-devel-onednn}" - "{_TAG_PREFIX}{ubuntu-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: @@ -54,12 +57,14 @@ releases: - "{ubuntu-devel}{jupyter}" - "{ubuntu-ppc64le}{jupyter}" - "{ubuntu-devel-ppc64le}{jupyter}" - - "{ubuntu-horovod}{jupyter}" - - "{ubuntu-devel-horovod}{jupyter}" - "{ubuntu-onednn}" - "{ubuntu-onednn}{onednn-jupyter}" - "{ubuntu-devel-onednn}" - "{ubuntu-devel-onednn}{onednn-jupyter}" + - "{ubuntu-onednn-mpi-horovod}" + - "{ubuntu-devel-onednn-mpi-horovod}" + - "{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" + - "{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" - "{ubuntu-devel-arm64v8}{jupyter}" slice_sets: @@ -142,40 +147,209 @@ slice_sets: - UBUNTU_VERSION=18.04 - CHECKOUT_TF_SRC=1 - ubuntu-horovod: - - add_to_name: "-horovod" - dockerfile_exclusive_name: "horovod" - dockerfile_subdirectory: "mkl_horovod" + ubuntu-onednn: + - add_to_name: "-16.04-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-onednn" + dockerfile_subdirectory: "onednn" partials: - - ubuntu/version - - ubuntu/cpu - - ubuntu/python + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python - tensorflow - - mkl_horovod/mpi - - mkl_horovod/horovod - shell tests: - - import-mkl-horovod.sh + - import-onednn.sh args: - TF_PACKAGE=intel-tensorflow - - ubuntu-devel-horovod: - - add_to_name: "devel-horovod" - dockerfile_exclusive_name: "devel-horovod" - dockerfile_subdirectory: "mkl_horovod" + - UBUNTU_VERSION=16.04 + - add_to_name: "-18.04-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-onednn" + dockerfile_subdirectory: "onednn" partials: - - ubuntu/version - - ubuntu/devel-cpu - - ubuntu/python - - ubuntu/bazel - - mkl_horovod/mpi - - mkl_horovod/devel-horovod + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow - shell tests: - - build-mkl-horovod.sh + - import-onednn.sh args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=18.04 + - add_to_name: "-20.04-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python3 + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + + ubuntu-devel-onednn: + - add_to_name: "-16.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-18.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-20.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python3 + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + + ubuntu-onednn-mpi-horovod: + - add_to_name: "-16.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-16.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/1604-horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=16.04 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + - add_to_name: "-18.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-18.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=18.04 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + - add_to_name: "-20.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-20.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python3 + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/2004-horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + + ubuntu-devel-onednn-mpi-horovod: + - add_to_name: "-16.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + - add_to_name: "-18.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + - add_to_name: "-20.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python3 + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master ubuntu-onednn: diff --git a/tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh similarity index 60% rename from tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh rename to tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh index b1cae48c6ee..18b3b19d69d 100755 --- a/tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh +++ b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh @@ -15,4 +15,12 @@ # limitations under the License. # ============================================================================ -python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd' +{ # try + echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled()); import horovod.tensorflow as hvd'` + echo "PASS: Horovod with MKL is enabled" +} || { # catch + echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled()); import horovod.tensorflow as hvd'` + echo "PASS: Horovod with Old MKL is detected" +} || { # finally + die "FAIL: Horovod with MKL is not enabled" +}