Adding OneDNN+MPI+Horovod partials and dockerfiles

This commit is contained in:
Abolfazl Shahbazi 2020-07-16 16:43:33 -07:00
parent 34f2782a79
commit a7580dc7f2
25 changed files with 1520 additions and 434 deletions

View File

@ -1,183 +0,0 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=18.04
FROM ubuntu:${UBUNTU_VERSION} AS base
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
# In case of Python 2.7+ we need to add passwd entries for user and group id
RUN chmod a+w /etc/passwd /etc/group
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN apt-get update && apt-get install -y \
build-essential \
curl \
git \
wget \
openjdk-8-jdk \
python3-dev \
virtualenv \
swig
RUN python3 -m pip --no-cache-dir install \
Pillow \
h5py \
keras_preprocessing \
matplotlib \
mock \
'numpy<1.19.0' \
scipy \
sklearn \
pandas \
future \
portpicker \
enum34
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
chmod +x /bazel/installer.sh && \
/bazel/installer.sh && \
rm -f /bazel/installer.sh
# install libnuma, openssh, wget
RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* ) || \
( yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all ) || \
( echo "Unsupported Linux distribution. Aborting!" && exit 1 )
# Install Open MPI
# download realese version from official website as openmpi github master is not always stable
ARG OPENMPI_VERSION=openmpi-4.0.0
ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget ${OPENMPI_DOWNLOAD_URL} && \
tar zxf ${OPENMPI_VERSION}.tar.gz && \
cd ${OPENMPI_VERSION} && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
RUN apt-get install -y --no-install-recommends wget
# some examples require git to fetch dependencies
RUN apt-get install -y --no-install-recommends git
WORKDIR /tf/tensorflow-tutorials
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
COPY readme-for-jupyter.md README.md
RUN apt-get autoremove -y && apt-get remove -y wget
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -1,4 +1,4 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -19,11 +19,13 @@
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=18.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
RUN apt-get update && apt-get install -y --no-install-recommends \
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
@ -50,14 +52,13 @@ ENV CI_BUILD_PYTHON python
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
# In case of Python 2.7+ we need to add passwd entries for user and group id
RUN chmod a+w /etc/passwd /etc/group
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y \
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
@ -68,78 +69,37 @@ RUN python3 -m pip --no-cache-dir install --upgrade \
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN apt-get update && apt-get install -y \
build-essential \
curl \
git \
wget \
openjdk-8-jdk \
python3-dev \
virtualenv \
swig
RUN python3 -m pip --no-cache-dir install \
Pillow \
h5py \
keras_preprocessing \
matplotlib \
mock \
'numpy<1.19.0' \
scipy \
sklearn \
pandas \
future \
portpicker \
enum34
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
chmod +x /bazel/installer.sh && \
/bazel/installer.sh && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
# install libnuma, openssh, wget
RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* ) || \
( yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all ) || \
( echo "Unsupported Linux distribution. Aborting!" && exit 1 )
ARG DEBIAN_FRONTEND="noninteractive"
# Install Open MPI
# download realese version from official website as openmpi github master is not always stable
ARG OPENMPI_VERSION=openmpi-4.0.0
ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget ${OPENMPI_DOWNLOAD_URL} && \
tar zxf ${OPENMPI_VERSION}.tar.gz && \
cd ${OPENMPI_VERSION} && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
@ -151,7 +111,22 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -0,0 +1,118 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -1,4 +1,4 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -19,16 +19,14 @@
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=18.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
RUN apt-get update && apt-get install -y curl
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y \
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
# install libnuma, openssh, wget
RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* ) || \
( yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all ) || \
( echo "Unsupported Linux distribution. Aborting!" && exit 1 )
ARG DEBIAN_FRONTEND="noninteractive"
# Install Open MPI
# download realese version from official website as openmpi github master is not always stable
ARG OPENMPI_VERSION=openmpi-4.0.0
ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget ${OPENMPI_DOWNLOAD_URL} && \
tar zxf ${OPENMPI_VERSION}.tar.gz && \
cd ${OPENMPI_VERSION} && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_VERSION=0.16.4
RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION}
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
software-properties-common
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
@ -110,20 +107,8 @@ RUN python3 -m pip install --no-cache-dir jupyter matplotlib
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
RUN apt-get install -y --no-install-recommends wget
# some examples require git to fetch dependencies
RUN apt-get install -y --no-install-recommends git
WORKDIR /tf/tensorflow-tutorials
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
COPY readme-for-jupyter.md README.md
RUN apt-get autoremove -y && apt-get remove -y wget
WORKDIR /tf
EXPOSE 8888

View File

@ -1,4 +1,4 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -19,16 +19,14 @@
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=18.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
RUN apt-get update && apt-get install -y curl
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y \
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
# install libnuma, openssh, wget
RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* ) || \
( yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all ) || \
( echo "Unsupported Linux distribution. Aborting!" && exit 1 )
ARG DEBIAN_FRONTEND="noninteractive"
# Install Open MPI
# download realese version from official website as openmpi github master is not always stable
ARG OPENMPI_VERSION=openmpi-4.0.0
ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget ${OPENMPI_DOWNLOAD_URL} && \
tar zxf ${OPENMPI_VERSION}.tar.gz && \
cd ${OPENMPI_VERSION} && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_VERSION=0.16.4
RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION}
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
software-properties-common
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -0,0 +1,132 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -0,0 +1,118 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -0,0 +1,112 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -0,0 +1,98 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
RUN python3 -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
curl \
software-properties-common
RUN python3 -m pip --no-cache-dir install --upgrade \
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl

View File

@ -0,0 +1,142 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl \
software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -0,0 +1,128 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS base
ARG DEBIAN_FRONTEND="noninteractive"
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV CI_BUILD_PYTHON python
# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
ARG CACHE_STOP=1
# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
ARG CHECKOUT_TF_SRC=0
ARG TF_BRANCH=master
RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl \
software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl
# Install bazel
ARG BAZEL_VERSION=3.1.0
RUN mkdir /bazel && \
curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
bash /bazel/installer.sh && \
rm -f /bazel/installer.sh
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
python3 \
python3-pip
curl \
software-properties-common
RUN python3 -m pip --no-cache-dir install --upgrade \
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl

View File

@ -0,0 +1,122 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl \
software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
${PYTHON}-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc
RUN python3 -m pip install --no-cache-dir jupyter matplotlib
# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422
RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0
RUN jupyter serverextension enable --py jupyter_http_over_ws
RUN mkdir -p /tf/ && chmod -R a+rwx /tf/
RUN mkdir /.local && chmod a+rwx /.local
WORKDIR /tf
EXPOSE 8888
RUN python3 -m ipykernel.kernelspec
CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"]

View File

@ -0,0 +1,108 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} as base
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ARG PYTHON=python3
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
curl \
software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y --no-install-recommends --fix-missing \
${PYTHON}
RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
ln -sf $(which ${PYTHON}) /usr/bin/python && \
ln -sf $(which ${PYTHON}) /usr/bin/python3
# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=
RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
${PYTHON}-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}
COPY bashrc /etc/bash.bashrc
RUN chmod a+rwx /etc/bash.bashrc

View File

@ -1,3 +0,0 @@
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true

View File

@ -1,3 +0,0 @@
# Install Horovod
ARG HOROVOD_VERSION=0.16.4
RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION}

View File

@ -1,47 +0,0 @@
# install libnuma, openssh, wget
RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* ) || \
( yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all ) || \
( echo "Unsupported Linux distribution. Aborting!" && exit 1 )
# Install Open MPI
# download realese version from official website as openmpi github master is not always stable
ARG OPENMPI_VERSION=openmpi-4.0.0
ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget ${OPENMPI_DOWNLOAD_URL} && \
tar zxf ${OPENMPI_VERSION}.tar.gz && \
cd ${OPENMPI_VERSION} && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config

View File

@ -0,0 +1,21 @@
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
software-properties-common
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}

View File

@ -0,0 +1,16 @@
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
${PYTHON}-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}

View File

@ -0,0 +1,4 @@
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
ARG HOROVOD_BRANCH=master
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true

View File

@ -0,0 +1,16 @@
# Install Horovod
ARG HOROVOD_WITHOUT_PYTORCH=1
ARG HOROVOD_WITHOUT_MXNET=1
ARG HOROVOD_WITH_TENSORFLOW=1
ARG HOROVOD_VERSION=
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
g++-8 \
gcc-8 \
python3-dev
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8
RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}}

View File

@ -0,0 +1,28 @@
ARG DEBIAN_FRONTEND="noninteractive"
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libopenmpi-dev \
openmpi-bin \
openmpi-common \
openssh-client \
openssh-server && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \
chmod a+x /usr/bin/mpirun
# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf
# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd
# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config

View File

@ -32,7 +32,6 @@ releases:
tag_specs:
- "{nightly}{jupyter}"
- "{_TAG_PREFIX}{ubuntu-devel}"
# Built per-release and pushed to tensorflow/tensorflow
# --arg _TAG_PREFIX=<val> should be set to "1.11" (for example) or "latest".
versioned:
@ -44,6 +43,10 @@ releases:
- "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}"
- "{_TAG_PREFIX}{ubuntu-devel-onednn}"
- "{_TAG_PREFIX}{ubuntu-devel-onednn}{onednn-jupyter}"
- "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}"
- "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}{onednn-jupyter}"
- "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}"
- "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}"
# Dockerfiles stored in the TF repo; not pushed anywhere
dockerfiles:
@ -54,12 +57,14 @@ releases:
- "{ubuntu-devel}{jupyter}"
- "{ubuntu-ppc64le}{jupyter}"
- "{ubuntu-devel-ppc64le}{jupyter}"
- "{ubuntu-horovod}{jupyter}"
- "{ubuntu-devel-horovod}{jupyter}"
- "{ubuntu-onednn}"
- "{ubuntu-onednn}{onednn-jupyter}"
- "{ubuntu-devel-onednn}"
- "{ubuntu-devel-onednn}{onednn-jupyter}"
- "{ubuntu-onednn-mpi-horovod}"
- "{ubuntu-devel-onednn-mpi-horovod}"
- "{ubuntu-onednn-mpi-horovod}{onednn-jupyter}"
- "{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}"
- "{ubuntu-devel-arm64v8}{jupyter}"
slice_sets:
@ -142,40 +147,209 @@ slice_sets:
- UBUNTU_VERSION=18.04
- CHECKOUT_TF_SRC=1
ubuntu-horovod:
- add_to_name: "-horovod"
dockerfile_exclusive_name: "horovod"
dockerfile_subdirectory: "mkl_horovod"
ubuntu-onednn:
- add_to_name: "-16.04-onednn"
dockerfile_exclusive_name: "ubuntu-16.04-onednn"
dockerfile_subdirectory: "onednn"
partials:
- ubuntu/version
- ubuntu/cpu
- ubuntu/python
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python
- tensorflow
- mkl_horovod/mpi
- mkl_horovod/horovod
- shell
tests:
- import-mkl-horovod.sh
- import-onednn.sh
args:
- TF_PACKAGE=intel-tensorflow
ubuntu-devel-horovod:
- add_to_name: "devel-horovod"
dockerfile_exclusive_name: "devel-horovod"
dockerfile_subdirectory: "mkl_horovod"
- UBUNTU_VERSION=16.04
- add_to_name: "-18.04-onednn"
dockerfile_exclusive_name: "ubuntu-18.04-onednn"
dockerfile_subdirectory: "onednn"
partials:
- ubuntu/version
- ubuntu/devel-cpu
- ubuntu/python
- ubuntu/bazel
- mkl_horovod/mpi
- mkl_horovod/devel-horovod
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python
- tensorflow
- shell
tests:
- build-mkl-horovod.sh
- import-onednn.sh
args:
- TF_PACKAGE=intel-tensorflow
- UBUNTU_VERSION=18.04
- add_to_name: "-20.04-onednn"
dockerfile_exclusive_name: "ubuntu-20.04-onednn"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python3
- tensorflow
- shell
tests:
- import-onednn.sh
args:
- TF_PACKAGE=intel-tensorflow
- UBUNTU_VERSION=20.04
- PYTHON=python3.7
ubuntu-devel-onednn:
- add_to_name: "-16.04-devel-onednn"
dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python
- onednn/ubuntu/bazel
- shell
tests:
- ""
args:
- UBUNTU_VERSION=16.04
- CHECKOUT_TF_SRC=1
- TF_BRANCH=master
- add_to_name: "-18.04-devel-onednn"
dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python
- onednn/ubuntu/bazel
- shell
tests:
- ""
args:
- UBUNTU_VERSION=18.04
- CHECKOUT_TF_SRC=1
- TF_BRANCH=master
- add_to_name: "-20.04-devel-onednn"
dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python3
- onednn/ubuntu/bazel
- shell
tests:
- ""
args:
- UBUNTU_VERSION=20.04
- PYTHON=python3.7
- CHECKOUT_TF_SRC=1
- TF_BRANCH=master
ubuntu-onednn-mpi-horovod:
- add_to_name: "-16.04-onednn-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-16.04-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python
- tensorflow
- onednn/ubuntu/mpi
- onednn/ubuntu/1604-horovod
- shell
tests:
- import-onednn-horovod.sh
args:
- UBUNTU_VERSION=16.04
- DEBIAN_FRONTEND="noninteractive"
- TF_PACKAGE=intel-tensorflow
- add_to_name: "-18.04-onednn-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-18.04-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python
- tensorflow
- onednn/ubuntu/mpi
- onednn/ubuntu/horovod
- shell
tests:
- import-onednn-horovod.sh
args:
- UBUNTU_VERSION=18.04
- DEBIAN_FRONTEND="noninteractive"
- TF_PACKAGE=intel-tensorflow
- add_to_name: "-20.04-onednn-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-20.04-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/cpu
- onednn/ubuntu/python3
- tensorflow
- onednn/ubuntu/mpi
- onednn/ubuntu/2004-horovod
- shell
tests:
- import-onednn-horovod.sh
args:
- UBUNTU_VERSION=20.04
- PYTHON=python3.7
- DEBIAN_FRONTEND="noninteractive"
- TF_PACKAGE=intel-tensorflow
ubuntu-devel-onednn-mpi-horovod:
- add_to_name: "-16.04-onednn-devel-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python
- onednn/ubuntu/bazel
- onednn/ubuntu/mpi
- onednn/ubuntu/devel-horovod
- shell
tests:
- ""
args:
- UBUNTU_VERSION=16.04
- CHECKOUT_TF_SRC=1
- CHECKOUT_HOROVOD_SRC=1
- HOROVOD_BRANCH=master
- add_to_name: "-18.04-onednn-devel-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python
- onednn/ubuntu/bazel
- onednn/ubuntu/mpi
- onednn/ubuntu/devel-horovod
- shell
tests:
- ""
args:
- UBUNTU_VERSION=18.04
- CHECKOUT_TF_SRC=1
- CHECKOUT_HOROVOD_SRC=1
- HOROVOD_BRANCH=master
- add_to_name: "-20.04-onednn-devel-mpi-horovod"
dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn-mpi-horovod"
dockerfile_subdirectory: "onednn"
partials:
- onednn/ubuntu/version
- onednn/ubuntu/devel
- onednn/ubuntu/python3
- onednn/ubuntu/bazel
- onednn/ubuntu/mpi
- onednn/ubuntu/devel-horovod
- shell
tests:
- ""
args:
- UBUNTU_VERSION=20.04
- PYTHON=python3.7
- CHECKOUT_TF_SRC=1
- CHECKOUT_HOROVOD_SRC=1
- HOROVOD_BRANCH=master
ubuntu-onednn:

View File

@ -15,4 +15,12 @@
# limitations under the License.
# ============================================================================
python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd'
{ # try
echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled()); import horovod.tensorflow as hvd'`
echo "PASS: Horovod with MKL is enabled"
} || { # catch
echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled()); import horovod.tensorflow as hvd'`
echo "PASS: Horovod with Old MKL is detected"
} || { # finally
die "FAIL: Horovod with MKL is not enabled"
}