Merge pull request #45133 from Intel-tensorflow:ganand1/master_icx_launch
PiperOrigin-RevId: 350040548 Change-Id: I7bc616fb7d1e01e073b3d4da3977be2e5523f71f
This commit is contained in:
commit
df7465d3f1
tensorflow/tools/ci_build/linux/mkl
@ -16,10 +16,14 @@ ARG ENABLE_SECURE_BUILD
|
||||
ARG BAZEL_VERSION=""
|
||||
ARG ENABLE_DNNL1=""
|
||||
ARG ENABLE_HOROVOD=""
|
||||
ARG ENABLE_GCC8=""
|
||||
ARG OPENMPI_VERSION=""
|
||||
ARG OPENMPI_DOWNLOAD_URL=""
|
||||
ARG HOROVOD_VERSION=""
|
||||
ARG INSTALL_HOROVOD_FROM_COMMIT=""
|
||||
ARG BUILD_SSH=""
|
||||
ARG TF_NIGHTLY_FLAG=""
|
||||
ARG RELEASE_CONTAINER=""
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
@ -31,6 +35,15 @@ RUN if [ "${BAZEL_VERSION}" != "" ]; then \
|
||||
rm -rf bazel-$BAZEL_VERSION-installer-linux-x86_64.sh; \
|
||||
fi
|
||||
|
||||
# Upgrade gcc-8 if argument is passed
|
||||
RUN if [ "${ENABLE_GCC8}" = "yes" ]; then \
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test -y && \
|
||||
apt-get update && \
|
||||
apt-get install gcc-8 g++-8 -y && \
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 80 --slave /usr/bin/g++ g++ /usr/bin/g++-8 --slave /usr/bin/gcov gcov /usr/bin/gcov-8 && \
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 70 --slave /usr/bin/g++ g++ /usr/bin/g++-7 --slave /usr/bin/gcov gcov /usr/bin/gcov-7 ;\
|
||||
fi
|
||||
|
||||
# Download and build TensorFlow from the latest sources found in the root container
|
||||
# make sure that if they pass in a tag, that it is loaded or we'll get an error
|
||||
WORKDIR /
|
||||
@ -66,10 +79,17 @@ RUN bazel --bazelrc=/root/.bazelrc build -c opt \
|
||||
COPY install_openmpi_horovod.sh .
|
||||
RUN if [ "${ENABLE_HOROVOD}" = "yes" ]; then \
|
||||
chmod +x install_openmpi_horovod.sh && \
|
||||
OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \
|
||||
OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} BUILD_SSH=${BUILD_SSH} \
|
||||
INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \
|
||||
rm -rf install_openmpi_horovod.sh; \
|
||||
fi
|
||||
|
||||
# Remove crypto python packages for software compliance check.
|
||||
RUN if [ "${RELEASE_CONTAINER}" = "yes" ]; then \
|
||||
${PIP} uninstall --yes cryptography && \
|
||||
rm -rf /usr/lib/python3/dist-packages/pycrypto-2.6.1.egg-info; \
|
||||
fi
|
||||
|
||||
# TensorBoard
|
||||
EXPOSE 6006
|
||||
# IPython
|
||||
|
@ -57,6 +57,8 @@ BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS:-no}
|
||||
BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS:-no}
|
||||
BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS:-no}
|
||||
BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS:-no}
|
||||
BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS:-no}
|
||||
BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS:-no}
|
||||
CONTAINER_PORT=${TF_DOCKER_BUILD_PORT:-8888}
|
||||
BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS:-yes}
|
||||
BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS:-no}
|
||||
@ -65,10 +67,14 @@ BAZEL_VERSION=${BAZEL_VERSION}
|
||||
BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS:-no}
|
||||
ENABLE_DNNL1=${ENABLE_DNNL1:-no}
|
||||
ENABLE_HOROVOD=${ENABLE_HOROVOD:-no}
|
||||
INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no}
|
||||
ENABLE_GCC8=${ENABLE_GCC8:-no}
|
||||
OPENMPI_VERSION=${OPENMPI_VERSION}
|
||||
OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}
|
||||
HOROVOD_VERSION=${HOROVOD_VERSION}
|
||||
BUILD_SSH=${BUILD_SSH:-no}
|
||||
IS_NIGHTLY=${IS_NIGHTLY:-no}
|
||||
RELEASE_CONTAINER=${RELEASE_CONTAINER:-no}
|
||||
|
||||
debug "ROOT_CONTAINER=${ROOT_CONTAINER}"
|
||||
debug "TF_ROOT_CONTAINER_TAG=${TF_ROOT_CONTAINER_TAG}"
|
||||
@ -80,18 +86,24 @@ debug "BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS}"
|
||||
debug "BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS}"
|
||||
debug "BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS}"
|
||||
debug "BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS}"
|
||||
debug "BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS}"
|
||||
debug "BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS}"
|
||||
debug "BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS}"
|
||||
debug "BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS}"
|
||||
debug "ENABLE_SECURE_BUILD=${ENABLE_SECURE_BUILD}"
|
||||
debug "TMP_DIR=${TMP_DIR}"
|
||||
debug "BAZEL_VERSION=${BAZEL_VERSION}"
|
||||
debug "ENABLE_GCC8=${ENABLE_GCC8}"
|
||||
debug "BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS}"
|
||||
debug "ENABLE_DNNL1=${ENABLE_DNNL1}"
|
||||
debug "ENABLE_HOROVOD=${ENABLE_HOROVOD}"
|
||||
debug "INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}"
|
||||
debug "OPENMPI_VERSION=${OPENMPI_VERSION}"
|
||||
debug "OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}"
|
||||
debug "HOROVOD_VERSION=${HOROVOD_VERSION}"
|
||||
debug "BUILD_SSH=${BUILD_SSH}"
|
||||
debug "IS_NIGHTLY=${IS_NIGHTLY}"
|
||||
debug "RELEASE_CONTAINER=${RELEASE_CONTAINER}"
|
||||
|
||||
function build_container()
|
||||
{
|
||||
@ -147,6 +159,8 @@ function build_container()
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_VERSION=${OPENMPI_VERSION}")
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}")
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg HOROVOD_VERSION=${HOROVOD_VERSION}")
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}")
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg BUILD_SSH=${BUILD_SSH}")
|
||||
fi
|
||||
|
||||
# Add build arg --nightly_flag for the nightly build
|
||||
@ -154,6 +168,11 @@ function build_container()
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg TF_NIGHTLY_FLAG=--nightly_flag")
|
||||
fi
|
||||
|
||||
# Add build arg GCC8 install
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg ENABLE_GCC8=${ENABLE_GCC8}")
|
||||
|
||||
TF_DOCKER_BUILD_ARGS+=("--build-arg RELEASE_CONTAINER=${RELEASE_CONTAINER}")
|
||||
|
||||
# Perform docker build
|
||||
debug "Building docker image with image name and tag: ${TEMP_IMAGE_NAME}"
|
||||
CMD="${DOCKER_BINARY} build ${TF_DOCKER_BUILD_ARGS[@]} --no-cache --pull -t ${TEMP_IMAGE_NAME} -f Dockerfile.devel-mkl ."
|
||||
@ -305,6 +324,14 @@ if [[ ${BUILD_CLX_CONTAINERS} == "yes" ]]; then
|
||||
PLATFORMS+=("icelake")
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ICX_CLIENT_CONTAINERS} == "yes" ]]; then
|
||||
PLATFORMS+=("icelake-client")
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ICX_SERVER_CONTAINERS} == "yes" ]]; then
|
||||
PLATFORMS+=("icelake-server")
|
||||
fi
|
||||
|
||||
# Checking out sources needs to be done only once
|
||||
checkout_tensorflow "${TF_REPO}" "${TF_BUILD_VERSION}" "${TF_BUILD_VERSION_IS_PR}"
|
||||
|
||||
@ -330,6 +357,14 @@ do
|
||||
FINAL_TAG="${FINAL_TAG}-avx512-VNNI"
|
||||
fi
|
||||
|
||||
if [[ ${PLATFORM} == "icelake-client" ]]; then
|
||||
FINAL_TAG="${FINAL_TAG}-icx-client"
|
||||
fi
|
||||
|
||||
if [[ ${PLATFORM} == "icelake-server" ]]; then
|
||||
FINAL_TAG="${FINAL_TAG}-icx-server"
|
||||
fi
|
||||
|
||||
# Add -devel-mkl to the image tag
|
||||
FINAL_TAG="${FINAL_TAG}-devel-mkl"
|
||||
if [[ "${PYTHON}" == "python3" ]]; then
|
||||
|
@ -22,7 +22,10 @@ set -e
|
||||
# Set default
|
||||
OPENMPI_VERSION=${OPENMPI_VERSION:-openmpi-2.1.1}
|
||||
OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL:-https://www.open-mpi.org/software/ompi/v2.1/downloads/openmpi-2.1.1.tar.gz}
|
||||
INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no}
|
||||
BUILD_SSH=${BUILD_SSH:-no}
|
||||
HOROVOD_VERSION=${HOROVOD_VERSION:-0.19.1}
|
||||
SSH_CONFIG_PATH=/etc/ssh/ssh_config
|
||||
|
||||
# Install Open MPI
|
||||
echo "Installing OpenMPI version ${OPENMPI_VERSION} ..."
|
||||
@ -54,27 +57,49 @@ echo 'OpenMPI version:'
|
||||
mpirun --version
|
||||
|
||||
# Install OpenSSH for MPI to communicate between containers
|
||||
apt-get clean && apt-get update && \
|
||||
apt-get install -y --no-install-recommends --fix-missing \
|
||||
openssh-client openssh-server libnuma-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
if [[ $? == "0" ]]; then
|
||||
echo "PASS: OpenSSH installation"
|
||||
if [[ ${BUILD_SSH} == "yes" ]]; then
|
||||
mkdir /tmp/buildssh
|
||||
cd /tmp/buildssh && curl -fSsL -O http://www.zlib.net/zlib-1.2.11.tar.gz && tar -xzvf zlib-1.2.11.tar.gz && \
|
||||
cd /tmp/buildssh/zlib-1.2.11 && ./configure && make && make install
|
||||
cd /tmp/buildssh && curl -fSsL -O https://www.openssl.org/source/openssl-1.1.1.tar.gz && tar -xzvf openssl-1.1.1.tar.gz && \
|
||||
cd /tmp/buildssh/openssl-1.1.1 && ./config && make && make test && make install
|
||||
cd /tmp/buildssh && curl -fSsL -O https://mirrors.sonic.net/pub/OpenBSD/OpenSSH/portable/openssh-8.4p1.tar.gz && \
|
||||
tar -xzvf openssh-8.4p1.tar.gz && cd /tmp/buildssh/openssh-8.4p1 && \
|
||||
./configure --with-md5-passwords && make && \
|
||||
groupadd sshd && useradd -M -g sshd -c 'sshd privsep' -d /var/empty -s /sbin/nologin sshd && passwd -l sshd && \
|
||||
make install
|
||||
apt-get clean && apt-get update && \
|
||||
apt-get install -y --no-install-recommends --fix-missing \
|
||||
libnuma-dev cmake
|
||||
SSH_CONFIG_PATH=/usr/local/etc/ssh_config
|
||||
else
|
||||
yum -y update && yum -y install numactl-devel openssh-server openssh-clients && \
|
||||
yum clean all
|
||||
if [[ $? == "0" ]]; then
|
||||
echo "PASS: OpenSSH installation"
|
||||
else
|
||||
echo "Unsupported Linux distribution. Aborting!" && exit 1
|
||||
fi
|
||||
apt-get clean && apt-get update && \
|
||||
apt-get install -y --no-install-recommends --fix-missing \
|
||||
openssh-client openssh-server libnuma-dev cmake && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
if [[ $? == "0" ]]; then
|
||||
echo "PASS: OpenSSH installation"
|
||||
else
|
||||
yum -y update && yum -y install numactl-devel openssh-server openssh-clients cmake && \
|
||||
yum clean all
|
||||
if [[ $? == "0" ]]; then
|
||||
echo "PASS: OpenSSH installation"
|
||||
else
|
||||
echo "Unsupported Linux distribution. Aborting!" && exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
mkdir -p /var/run/sshd
|
||||
grep -v StrictHostKeyChecking ${SSH_CONFIG_PATH} > /etc/ssh/ssh_config.new
|
||||
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||
grep -v StrictHostKeyChecking /etc/ssh/ssh_config > /etc/ssh/ssh_config.new
|
||||
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new
|
||||
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
|
||||
mv /etc/ssh/ssh_config.new ${SSH_CONFIG_PATH}
|
||||
|
||||
# Install Horovod
|
||||
HOROVOD_WITH_TENSORFLOW=1
|
||||
python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION}
|
||||
if [[ ${INSTALL_HOROVOD_FROM_COMMIT} == "yes" ]]; then
|
||||
HOROVOD_WITH_TENSORFLOW=1
|
||||
python3 -m pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION}
|
||||
else
|
||||
HOROVOD_WITH_TENSORFLOW=1
|
||||
python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION}
|
||||
fi
|
||||
|
@ -180,6 +180,46 @@ class CascadelakePlatform(IntelPlatform):
|
||||
CASCADELAKE_ARCH_NEW + " "
|
||||
|
||||
|
||||
class IcelakeClientPlatform(IntelPlatform):
|
||||
|
||||
def __init__(self):
|
||||
IntelPlatform.__init__(self, 8, 4)
|
||||
|
||||
def get_bazel_gcc_flags(self):
|
||||
ICELAKE_ARCH_OLD = "skylake-avx512"
|
||||
ICELAKE_ARCH_NEW = "icelake-client"
|
||||
AVX512_FLAGS = ["avx512f", "avx512cd"]
|
||||
if IntelPlatform.use_old_arch_names(self, 8, 4):
|
||||
ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \
|
||||
ICELAKE_ARCH_OLD + " "
|
||||
for flag in AVX512_FLAGS:
|
||||
ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " "
|
||||
return ret_val
|
||||
else:
|
||||
return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \
|
||||
ICELAKE_ARCH_NEW + " "
|
||||
|
||||
|
||||
class IcelakeServerPlatform(IntelPlatform):
|
||||
|
||||
def __init__(self):
|
||||
IntelPlatform.__init__(self, 8, 4)
|
||||
|
||||
def get_bazel_gcc_flags(self):
|
||||
ICELAKE_ARCH_OLD = "skylake-avx512"
|
||||
ICELAKE_ARCH_NEW = "icelake-server"
|
||||
AVX512_FLAGS = ["avx512f", "avx512cd"]
|
||||
if IntelPlatform.use_old_arch_names(self, 8, 4):
|
||||
ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \
|
||||
ICELAKE_ARCH_OLD + " "
|
||||
for flag in AVX512_FLAGS:
|
||||
ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " "
|
||||
return ret_val
|
||||
else:
|
||||
return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \
|
||||
ICELAKE_ARCH_NEW + " "
|
||||
|
||||
|
||||
class BuildEnvSetter(object):
|
||||
"""Prepares the proper environment settings for various Intel platforms."""
|
||||
default_platform_ = "haswell"
|
||||
@ -189,7 +229,9 @@ class BuildEnvSetter(object):
|
||||
"sandybridge": SandyBridgePlatform(),
|
||||
"haswell": HaswellPlatform(),
|
||||
"skylake": SkylakePlatform(),
|
||||
"cascadelake": CascadelakePlatform()
|
||||
"cascadelake": CascadelakePlatform(),
|
||||
"icelake-client": IcelakeClientPlatform(),
|
||||
"icelake-server": IcelakeServerPlatform(),
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
|
Loading…
Reference in New Issue
Block a user