Add Dockerfile partials to support Mkl + MPI + Horovod; Remove trailing whitespace from python.partial.Dockerfile
This commit is contained in:
parent
eae5d0bc15
commit
3023584591
tensorflow/tools/dockerfiles
@ -0,0 +1,3 @@
|
|||||||
|
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
|
||||||
|
ARG CHECKOUT_HOROVOD_SRC=0
|
||||||
|
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true
|
@ -0,0 +1,2 @@
|
|||||||
|
# Install Horovod
|
||||||
|
RUN ${PIP} install --no-cache-dir horovod
|
44
tensorflow/tools/dockerfiles/partials/mpi.partial.Dockerfile
Normal file
44
tensorflow/tools/dockerfiles/partials/mpi.partial.Dockerfile
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# install libnuma, openssh, wget
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||||
|
libnuma-dev \
|
||||||
|
openssh-server \
|
||||||
|
openssh-client \
|
||||||
|
wget && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* || \
|
||||||
|
yum -y update && yum -y install \
|
||||||
|
numactl-devel \
|
||||||
|
openssh-server \
|
||||||
|
openssh-clients \
|
||||||
|
wget && \
|
||||||
|
yum clean all || \
|
||||||
|
echo "Unsupported Linux distribution. Aborting!" && exit 1
|
||||||
|
|
||||||
|
# Install Open MPI
|
||||||
|
RUN mkdir /tmp/openmpi && \
|
||||||
|
cd /tmp/openmpi && \
|
||||||
|
wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz && \
|
||||||
|
tar zxf openmpi-4.0.0.tar.gz && \
|
||||||
|
cd openmpi-4.0.0 && \
|
||||||
|
./configure --enable-orterun-prefix-by-default && \
|
||||||
|
make -j $(nproc) all && \
|
||||||
|
make install && \
|
||||||
|
ldconfig && \
|
||||||
|
rm -rf /tmp/openmpi
|
||||||
|
|
||||||
|
# Create a wrapper for OpenMPI to allow running as root by default
|
||||||
|
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
|
||||||
|
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
|
||||||
|
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
|
||||||
|
chmod a+x /usr/local/bin/mpirun
|
||||||
|
|
||||||
|
# Configure OpenMPI to run good defaults:
|
||||||
|
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
|
||||||
|
|
||||||
|
# Install OpenSSH for MPI to communicate between containers
|
||||||
|
RUN mkdir -p /var/run/sshd
|
||||||
|
|
||||||
|
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||||
|
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
|
||||||
|
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
|
||||||
|
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
|
@ -15,4 +15,4 @@ RUN ${PIP} --no-cache-dir install --upgrade \
|
|||||||
setuptools
|
setuptools
|
||||||
|
|
||||||
# Some TF tools expect a "python" binary
|
# Some TF tools expect a "python" binary
|
||||||
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
|
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
header: |
|
header: |
|
||||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@ -83,6 +83,21 @@ slice_sets:
|
|||||||
- ubuntu/python
|
- ubuntu/python
|
||||||
- tensorflow
|
- tensorflow
|
||||||
- shell
|
- shell
|
||||||
|
- add_to_name: "-horovod"
|
||||||
|
dockerfile_exclusive_name: "horovod"
|
||||||
|
dockerfile_subdirectory: "mkl"
|
||||||
|
partials:
|
||||||
|
- ubuntu/version
|
||||||
|
- ubuntu/cpu
|
||||||
|
- ubuntu/python
|
||||||
|
- tensorflow
|
||||||
|
- mpi
|
||||||
|
- horovod
|
||||||
|
- shell
|
||||||
|
tests:
|
||||||
|
- import-mkl-horovod.sh
|
||||||
|
args:
|
||||||
|
- TF_PACKAGE=intel-tensorflow
|
||||||
- add_to_name: "-gpu"
|
- add_to_name: "-gpu"
|
||||||
dockerfile_exclusive_name: "gpu"
|
dockerfile_exclusive_name: "gpu"
|
||||||
args:
|
args:
|
||||||
@ -110,6 +125,22 @@ slice_sets:
|
|||||||
- build-cpu.sh
|
- build-cpu.sh
|
||||||
args:
|
args:
|
||||||
- CHECKOUT_TF_SRC=1
|
- CHECKOUT_TF_SRC=1
|
||||||
|
- add_to_name: "devel-horovod"
|
||||||
|
dockerfile_exclusive_name: "devel-horovod"
|
||||||
|
dockerfile_subdirectory: "mkl"
|
||||||
|
partials:
|
||||||
|
- ubuntu/version
|
||||||
|
- ubuntu/devel-cpu
|
||||||
|
- ubuntu/python
|
||||||
|
- ubuntu/bazel
|
||||||
|
- mpi
|
||||||
|
- devel-horovod
|
||||||
|
- shell
|
||||||
|
tests:
|
||||||
|
- build-mkl-horovod.sh
|
||||||
|
args:
|
||||||
|
- CHECKOUT_TF_SRC=1
|
||||||
|
- CHECKOUT_HOROVOD_SRC=1
|
||||||
- add_to_name: "devel-gpu"
|
- add_to_name: "devel-gpu"
|
||||||
dockerfile_exclusive_name: "devel-gpu"
|
dockerfile_exclusive_name: "devel-gpu"
|
||||||
partials:
|
partials:
|
||||||
|
46
tensorflow/tools/dockerfiles/tests/build-mkl-horovod.sh
Executable file
46
tensorflow/tools/dockerfiles/tests/build-mkl-horovod.sh
Executable file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Download and build TensorFlow.
|
||||||
|
set -euxo pipefail
|
||||||
|
git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow
|
||||||
|
cd /tensorflow
|
||||||
|
|
||||||
|
ln -s $(which ${PYTHON}) /usr/local/bin/python
|
||||||
|
|
||||||
|
# Build TensorFlow with support for Intel(R) MKL-DNN
|
||||||
|
yes "" | ${PYTHON} configure.py && \
|
||||||
|
bazel build -c opt --config=mkl --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
|
||||||
|
tensorflow/tools/pip_package:build_pip_package && \
|
||||||
|
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
|
||||||
|
pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
|
||||||
|
rm -rf /tmp/pip && \
|
||||||
|
rm -rf /root/.cache
|
||||||
|
|
||||||
|
|
||||||
|
# download and build Horovod
|
||||||
|
git clone --recursive https://github.com/uber/horovod.git
|
||||||
|
cd horovod
|
||||||
|
# export environment
|
||||||
|
export HOROVOD_WITHOUT_PYTORCH=1
|
||||||
|
export HOROVOD_WITH_TENSORFLOW=1
|
||||||
|
python setup.py sdist
|
||||||
|
pip --no-cache-dir install --upgrade sdist/horovod*.tar.gz && \
|
||||||
|
rm -rf sdist && \
|
||||||
|
rm -rf /root/.cache
|
18
tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh
Executable file
18
tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd'
|
Loading…
Reference in New Issue
Block a user