Add Dockerfile partials to support Mkl + MPI + Horovod; Remove trailing whitespace from python.partial.Dockerfile
This commit is contained in:
parent
eae5d0bc15
commit
3023584591
tensorflow/tools/dockerfiles
@ -0,0 +1,3 @@
|
||||
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
|
||||
ARG CHECKOUT_HOROVOD_SRC=0
|
||||
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true
|
@ -0,0 +1,2 @@
|
||||
# Install Horovod
|
||||
RUN ${PIP} install --no-cache-dir horovod
|
44
tensorflow/tools/dockerfiles/partials/mpi.partial.Dockerfile
Normal file
44
tensorflow/tools/dockerfiles/partials/mpi.partial.Dockerfile
Normal file
@ -0,0 +1,44 @@
|
||||
# install libnuma, openssh, wget
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libnuma-dev \
|
||||
openssh-server \
|
||||
openssh-client \
|
||||
wget && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* || \
|
||||
yum -y update && yum -y install \
|
||||
numactl-devel \
|
||||
openssh-server \
|
||||
openssh-clients \
|
||||
wget && \
|
||||
yum clean all || \
|
||||
echo "Unsupported Linux distribution. Aborting!" && exit 1
|
||||
|
||||
# Install Open MPI
|
||||
RUN mkdir /tmp/openmpi && \
|
||||
cd /tmp/openmpi && \
|
||||
wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz && \
|
||||
tar zxf openmpi-4.0.0.tar.gz && \
|
||||
cd openmpi-4.0.0 && \
|
||||
./configure --enable-orterun-prefix-by-default && \
|
||||
make -j $(nproc) all && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /tmp/openmpi
|
||||
|
||||
# Create a wrapper for OpenMPI to allow running as root by default
|
||||
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
|
||||
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
|
||||
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
|
||||
chmod a+x /usr/local/bin/mpirun
|
||||
|
||||
# Configure OpenMPI to run good defaults:
|
||||
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
|
||||
|
||||
# Install OpenSSH for MPI to communicate between containers
|
||||
RUN mkdir -p /var/run/sshd
|
||||
|
||||
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
|
||||
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
|
||||
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
|
@ -15,4 +15,4 @@ RUN ${PIP} --no-cache-dir install --upgrade \
|
||||
setuptools
|
||||
|
||||
# Some TF tools expect a "python" binary
|
||||
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
|
||||
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
|
||||
|
@ -1,5 +1,5 @@
|
||||
header: |
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -83,6 +83,21 @@ slice_sets:
|
||||
- ubuntu/python
|
||||
- tensorflow
|
||||
- shell
|
||||
- add_to_name: "-horovod"
|
||||
dockerfile_exclusive_name: "horovod"
|
||||
dockerfile_subdirectory: "mkl"
|
||||
partials:
|
||||
- ubuntu/version
|
||||
- ubuntu/cpu
|
||||
- ubuntu/python
|
||||
- tensorflow
|
||||
- mpi
|
||||
- horovod
|
||||
- shell
|
||||
tests:
|
||||
- import-mkl-horovod.sh
|
||||
args:
|
||||
- TF_PACKAGE=intel-tensorflow
|
||||
- add_to_name: "-gpu"
|
||||
dockerfile_exclusive_name: "gpu"
|
||||
args:
|
||||
@ -110,6 +125,22 @@ slice_sets:
|
||||
- build-cpu.sh
|
||||
args:
|
||||
- CHECKOUT_TF_SRC=1
|
||||
- add_to_name: "devel-horovod"
|
||||
dockerfile_exclusive_name: "devel-horovod"
|
||||
dockerfile_subdirectory: "mkl"
|
||||
partials:
|
||||
- ubuntu/version
|
||||
- ubuntu/devel-cpu
|
||||
- ubuntu/python
|
||||
- ubuntu/bazel
|
||||
- mpi
|
||||
- devel-horovod
|
||||
- shell
|
||||
tests:
|
||||
- build-mkl-horovod.sh
|
||||
args:
|
||||
- CHECKOUT_TF_SRC=1
|
||||
- CHECKOUT_HOROVOD_SRC=1
|
||||
- add_to_name: "devel-gpu"
|
||||
dockerfile_exclusive_name: "devel-gpu"
|
||||
partials:
|
||||
|
46
tensorflow/tools/dockerfiles/tests/build-mkl-horovod.sh
Executable file
46
tensorflow/tools/dockerfiles/tests/build-mkl-horovod.sh
Executable file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
|
||||
|
||||
# Download and build TensorFlow.
|
||||
set -euxo pipefail
|
||||
git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow
|
||||
cd /tensorflow
|
||||
|
||||
ln -s $(which ${PYTHON}) /usr/local/bin/python
|
||||
|
||||
# Build TensorFlow with support for Intel(R) MKL-DNN
|
||||
yes "" | ${PYTHON} configure.py && \
|
||||
bazel build -c opt --config=mkl --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
|
||||
tensorflow/tools/pip_package:build_pip_package && \
|
||||
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
|
||||
pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
|
||||
rm -rf /tmp/pip && \
|
||||
rm -rf /root/.cache
|
||||
|
||||
|
||||
# download and build Horovod
|
||||
git clone --recursive https://github.com/uber/horovod.git
|
||||
cd horovod
|
||||
# export environment
|
||||
export HOROVOD_WITHOUT_PYTORCH=1
|
||||
export HOROVOD_WITH_TENSORFLOW=1
|
||||
python setup.py sdist
|
||||
pip --no-cache-dir install --upgrade sdist/horovod*.tar.gz && \
|
||||
rm -rf sdist && \
|
||||
rm -rf /root/.cache
|
18
tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh
Executable file
18
tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd'
|
Loading…
Reference in New Issue
Block a user