From e8d60b9d7ac5aabef46ff4983caa3714625fcfb9 Mon Sep 17 00:00:00 2001 From: ganand1 <gandhimathi.anand@intel.com> Date: Fri, 6 Nov 2020 15:21:42 -0800 Subject: [PATCH 1/3] ICX changes. --- .../ci_build/linux/mkl/Dockerfile.devel-mkl | 22 ++++++- .../ci_build/linux/mkl/build-dev-container.sh | 39 +++++++++++ .../linux/mkl/install_openmpi_horovod.sh | 65 +++++++++++++------ .../tools/ci_build/linux/mkl/set-build-env.py | 41 +++++++++++- 4 files changed, 146 insertions(+), 21 deletions(-) diff --git a/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl b/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl index 80091e55a17..9f465694d3b 100755 --- a/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl +++ b/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl @@ -16,10 +16,14 @@ ARG ENABLE_SECURE_BUILD ARG BAZEL_VERSION="" ARG ENABLE_DNNL1="" ARG ENABLE_HOROVOD="" +ARG ENABLE_GCC8="" ARG OPENMPI_VERSION="" ARG OPENMPI_DOWNLOAD_URL="" ARG HOROVOD_VERSION="" +ARG INSTALL_HOROVOD_FROM_COMMIT="" +ARG BUILD_SSH="" ARG TF_NIGHTLY_FLAG="" +ARG RELEASE_CONTAINER="" ENV DEBIAN_FRONTEND=noninteractive @@ -31,6 +35,15 @@ RUN if [ "${BAZEL_VERSION}" != "" ]; then \ rm -rf bazel-$BAZEL_VERSION-installer-linux-x86_64.sh; \ fi +# Upgrade gcc-8 if argument is passed +RUN if [ "${ENABLE_GCC8}" = "yes" ]; then \ + add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ + apt-get update && \ + apt-get install gcc-8 g++-8 -y && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 80 --slave /usr/bin/g++ g++ /usr/bin/g++-8 --slave /usr/bin/gcov gcov /usr/bin/gcov-8 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 70 --slave /usr/bin/g++ g++ /usr/bin/g++-7 --slave /usr/bin/gcov gcov /usr/bin/gcov-7 ;\ + fi + # Download and build TensorFlow from the latest sources found in the root container # make sure that if they pass in a tag, that it is loaded or we'll get an error WORKDIR / @@ -66,10 +79,17 @@ RUN bazel --bazelrc=/root/.bazelrc build -c opt \ COPY install_openmpi_horovod.sh . RUN if [ "${ENABLE_HOROVOD}" = "yes" ]; then \ chmod +x install_openmpi_horovod.sh && \ - OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \ + OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} BUILD_SSH=${BUILD_SSH} \ + INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \ rm -rf install_openmpi_horovod.sh; \ fi +# Remove crypto python packages due to SWLC +RUN if [ "${RELEASE_CONTAINER}" = "yes" ]; then \ + ${PIP} uninstall --yes cryptography && \ + rm -rf /usr/lib/python3/dist-packages/pycrypto-2.6.1.egg-info; \ + fi + # TensorBoard EXPOSE 6006 # IPython diff --git a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh index a0880b0e51c..3f8e9e69a13 100755 --- a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh +++ b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh @@ -57,6 +57,8 @@ BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS:-no} BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS:-no} BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS:-no} BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS:-no} +BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS:-no} +BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS:-no} CONTAINER_PORT=${TF_DOCKER_BUILD_PORT:-8888} BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS:-yes} BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS:-no} @@ -65,10 +67,14 @@ BAZEL_VERSION=${BAZEL_VERSION} BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS:-no} ENABLE_DNNL1=${ENABLE_DNNL1:-no} ENABLE_HOROVOD=${ENABLE_HOROVOD:-no} +INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no} +ENABLE_GCC8=${ENABLE_GCC8:-no} OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} HOROVOD_VERSION=${HOROVOD_VERSION} +BUILD_SSH=${BUILD_SSH:-no} IS_NIGHTLY=${IS_NIGHTLY:-no} +RELEASE_CONTAINER=${RELEASE_CONTAINER:-no} debug "ROOT_CONTAINER=${ROOT_CONTAINER}" debug "TF_ROOT_CONTAINER_TAG=${TF_ROOT_CONTAINER_TAG}" @@ -80,18 +86,24 @@ debug "BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS}" debug "BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS}" debug "BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS}" debug "BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS}" +debug "BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS}" +debug "BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS}" debug "BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS}" debug "BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS}" debug "ENABLE_SECURE_BUILD=${ENABLE_SECURE_BUILD}" debug "TMP_DIR=${TMP_DIR}" debug "BAZEL_VERSION=${BAZEL_VERSION}" +debug "ENABLE_GCC8=${ENABLE_GCC8}" debug "BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS}" debug "ENABLE_DNNL1=${ENABLE_DNNL1}" debug "ENABLE_HOROVOD=${ENABLE_HOROVOD}" +debug "INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}" debug "OPENMPI_VERSION=${OPENMPI_VERSION}" debug "OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}" debug "HOROVOD_VERSION=${HOROVOD_VERSION}" +debug "BUILD_SSH=${BUILD_SSH}" debug "IS_NIGHTLY=${IS_NIGHTLY}" +debug "RELEASE_CONTAINER=${RELEASE_CONTAINER}" function build_container() { @@ -147,6 +159,8 @@ function build_container() TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_VERSION=${OPENMPI_VERSION}") TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}") TF_DOCKER_BUILD_ARGS+=("--build-arg HOROVOD_VERSION=${HOROVOD_VERSION}") + TF_DOCKER_BUILD_ARGS+=("--build-arg INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}") + TF_DOCKER_BUILD_ARGS+=("--build-arg BUILD_SSH=${BUILD_SSH}") fi # Add build arg --nightly_flag for the nightly build @@ -154,6 +168,15 @@ function build_container() TF_DOCKER_BUILD_ARGS+=("--build-arg TF_NIGHTLY_FLAG=--nightly_flag") fi + # Add build arg GCC8 install + if [[ ${ENABLE_GCC8} == "yes" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg ENABLE_GCC8=${ENABLE_GCC8}") + fi + + if [[ ${RELEASE_CONTAINER} == "yes" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg RELEASE_CONTAINER=${RELEASE_CONTAINER}") + fi + # Perform docker build debug "Building docker image with image name and tag: ${TEMP_IMAGE_NAME}" CMD="${DOCKER_BINARY} build ${TF_DOCKER_BUILD_ARGS[@]} --no-cache --pull -t ${TEMP_IMAGE_NAME} -f Dockerfile.devel-mkl ." @@ -305,6 +328,14 @@ if [[ ${BUILD_CLX_CONTAINERS} == "yes" ]]; then PLATFORMS+=("icelake") fi +if [[ ${BUILD_ICX_CLIENT_CONTAINERS} == "yes" ]]; then + PLATFORMS+=("icelake-client") +fi + +if [[ ${BUILD_ICX_SERVER_CONTAINERS} == "yes" ]]; then + PLATFORMS+=("icelake-server") +fi + # Checking out sources needs to be done only once checkout_tensorflow "${TF_REPO}" "${TF_BUILD_VERSION}" "${TF_BUILD_VERSION_IS_PR}" @@ -330,6 +361,14 @@ do FINAL_TAG="${FINAL_TAG}-avx512-VNNI" fi + if [[ ${PLATFORM} == "icelake-client" ]]; then + FINAL_TAG="${FINAL_TAG}-icx-client" + fi + + if [[ ${PLATFORM} == "icelake-server" ]]; then + FINAL_TAG="${FINAL_TAG}-icx-server" + fi + # Add -devel-mkl to the image tag FINAL_TAG="${FINAL_TAG}-devel-mkl" if [[ "${PYTHON}" == "python3" ]]; then diff --git a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh index 9bc92ca4fef..6d795b915bf 100755 --- a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh +++ b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh @@ -22,6 +22,8 @@ set -e # Set default OPENMPI_VERSION=${OPENMPI_VERSION:-openmpi-2.1.1} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL:-https://www.open-mpi.org/software/ompi/v2.1/downloads/openmpi-2.1.1.tar.gz} +INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no} +BUILD_SSH=${BUILD_SSH:-no} HOROVOD_VERSION=${HOROVOD_VERSION:-0.19.1} # Install Open MPI @@ -54,27 +56,52 @@ echo 'OpenMPI version:' mpirun --version # Install OpenSSH for MPI to communicate between containers -apt-get clean && apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - openssh-client openssh-server libnuma-dev && \ - rm -rf /var/lib/apt/lists/* -if [[ $? == "0" ]]; then - echo "PASS: OpenSSH installation" +if [[ ${BUILD_SSH} == "yes" ]]; then + mkdir /tmp/buildssh + cd /tmp/buildssh && curl -fSsL -O http://www.zlib.net/zlib-1.2.11.tar.gz && tar -xzvf zlib-1.2.11.tar.gz && \ + cd /tmp/buildssh/zlib-1.2.11 && ./configure && make && make install + cd /tmp/buildssh && curl -fSsL -O https://www.openssl.org/source/openssl-1.1.1.tar.gz && tar -xzvf openssl-1.1.1.tar.gz && \ + cd /tmp/buildssh/openssl-1.1.1 && ./config && make && make test && make install + cd /tmp/buildssh && curl -fSsL -O https://mirrors.sonic.net/pub/OpenBSD/OpenSSH/portable/openssh-8.4p1.tar.gz && \ + tar -xzvf openssh-8.4p1.tar.gz && cd /tmp/buildssh/openssh-8.4p1 && \ + ./configure --with-md5-passwords && make && \ + groupadd sshd && useradd -M -g sshd -c 'sshd privsep' -d /var/empty -s /sbin/nologin sshd && passwd -l sshd && \ + make install + apt-get clean && apt-get update && \ + apt-get install -y --no-install-recommends --fix-missing \ + libnuma-dev cmake + grep -v StrictHostKeyChecking /usr/local/etc/ssh_config > /usr/local/etc/ssh_config.new + # Allow OpenSSH to talk to containers without asking for confirmation + echo " StrictHostKeyChecking no" >> /usr/local/etc/ssh_config.new + mv /usr/local/etc/ssh_config.new /usr/local/etc/ssh_config else - yum -y update && yum -y install numactl-devel openssh-server openssh-clients && \ - yum clean all - if [[ $? == "0" ]]; then - echo "PASS: OpenSSH installation" - else - echo "Unsupported Linux distribution. Aborting!" && exit 1 - fi + apt-get clean && apt-get update && \ + apt-get install -y --no-install-recommends --fix-missing \ + openssh-client openssh-server libnuma-dev cmake && \ + rm -rf /var/lib/apt/lists/* + if [[ $? == "0" ]]; then + echo "PASS: OpenSSH installation" + else + yum -y update && yum -y install numactl-devel openssh-server openssh-clients cmake && \ + yum clean all + if [[ $? == "0" ]]; then + echo "PASS: OpenSSH installation" + else + echo "Unsupported Linux distribution. Aborting!" && exit 1 + fi + fi + grep -v StrictHostKeyChecking /etc/ssh/ssh_config > /etc/ssh/ssh_config.new + # Allow OpenSSH to talk to containers without asking for confirmation + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config fi mkdir -p /var/run/sshd -# Allow OpenSSH to talk to containers without asking for confirmation -grep -v StrictHostKeyChecking /etc/ssh/ssh_config > /etc/ssh/ssh_config.new -echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new -mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -HOROVOD_WITH_TENSORFLOW=1 -python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +if [[ ${INSTALL_HOROVOD_FROM_COMMIT} == "yes" ]]; then + HOROVOD_WITH_TENSORFLOW=1 + python3 -m pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION} +else + HOROVOD_WITH_TENSORFLOW=1 + python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +fi diff --git a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py index ba5176a4b7a..e070533f039 100755 --- a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py +++ b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py @@ -179,6 +179,43 @@ class CascadelakePlatform(IntelPlatform): return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ CASCADELAKE_ARCH_NEW + " " +class IcelakeClientPlatform(IntelPlatform): + + def __init__(self): + IntelPlatform.__init__(self, 8, 4) + + def get_bazel_gcc_flags(self): + ICELAKE_ARCH_OLD = "skylake-avx512" + ICELAKE_ARCH_NEW = "icelake-client" + AVX512_FLAGS = ["avx512f", "avx512cd"] + if IntelPlatform.use_old_arch_names(self, 8, 4): + ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_OLD + " " + for flag in AVX512_FLAGS: + ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " " + return ret_val + else: + return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_NEW + " " + +class IcelakeServerPlatform(IntelPlatform): + + def __init__(self): + IntelPlatform.__init__(self, 8, 4) + + def get_bazel_gcc_flags(self): + ICELAKE_ARCH_OLD = "skylake-avx512" + ICELAKE_ARCH_NEW = "icelake-server" + AVX512_FLAGS = ["avx512f", "avx512cd"] + if IntelPlatform.use_old_arch_names(self, 8, 4): + ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_OLD + " " + for flag in AVX512_FLAGS: + ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " " + return ret_val + else: + return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_NEW + " " class BuildEnvSetter(object): """Prepares the proper environment settings for various Intel platforms.""" @@ -189,7 +226,9 @@ class BuildEnvSetter(object): "sandybridge": SandyBridgePlatform(), "haswell": HaswellPlatform(), "skylake": SkylakePlatform(), - "cascadelake": CascadelakePlatform() + "cascadelake": CascadelakePlatform(), + "icelake-client": IcelakeClientPlatform(), + "icelake-server": IcelakeServerPlatform(), } def __init__(self): From b571638c5298dbd5420565604dc82b2c26432ce7 Mon Sep 17 00:00:00 2001 From: ganand1 <gandhimathi.anand@intel.com> Date: Fri, 6 Nov 2020 15:21:42 -0800 Subject: [PATCH 2/3] [Intel-MKL] Add support for Icelake compile option Adding Support for Icelake compile options. Enabling GCC-8 needed for Icelake. --- .../ci_build/linux/mkl/Dockerfile.devel-mkl | 22 ++++++- .../ci_build/linux/mkl/build-dev-container.sh | 39 +++++++++++ .../linux/mkl/install_openmpi_horovod.sh | 65 +++++++++++++------ .../tools/ci_build/linux/mkl/set-build-env.py | 41 +++++++++++- 4 files changed, 146 insertions(+), 21 deletions(-) diff --git a/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl b/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl index 80091e55a17..9f465694d3b 100755 --- a/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl +++ b/tensorflow/tools/ci_build/linux/mkl/Dockerfile.devel-mkl @@ -16,10 +16,14 @@ ARG ENABLE_SECURE_BUILD ARG BAZEL_VERSION="" ARG ENABLE_DNNL1="" ARG ENABLE_HOROVOD="" +ARG ENABLE_GCC8="" ARG OPENMPI_VERSION="" ARG OPENMPI_DOWNLOAD_URL="" ARG HOROVOD_VERSION="" +ARG INSTALL_HOROVOD_FROM_COMMIT="" +ARG BUILD_SSH="" ARG TF_NIGHTLY_FLAG="" +ARG RELEASE_CONTAINER="" ENV DEBIAN_FRONTEND=noninteractive @@ -31,6 +35,15 @@ RUN if [ "${BAZEL_VERSION}" != "" ]; then \ rm -rf bazel-$BAZEL_VERSION-installer-linux-x86_64.sh; \ fi +# Upgrade gcc-8 if argument is passed +RUN if [ "${ENABLE_GCC8}" = "yes" ]; then \ + add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ + apt-get update && \ + apt-get install gcc-8 g++-8 -y && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 80 --slave /usr/bin/g++ g++ /usr/bin/g++-8 --slave /usr/bin/gcov gcov /usr/bin/gcov-8 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 70 --slave /usr/bin/g++ g++ /usr/bin/g++-7 --slave /usr/bin/gcov gcov /usr/bin/gcov-7 ;\ + fi + # Download and build TensorFlow from the latest sources found in the root container # make sure that if they pass in a tag, that it is loaded or we'll get an error WORKDIR / @@ -66,10 +79,17 @@ RUN bazel --bazelrc=/root/.bazelrc build -c opt \ COPY install_openmpi_horovod.sh . RUN if [ "${ENABLE_HOROVOD}" = "yes" ]; then \ chmod +x install_openmpi_horovod.sh && \ - OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \ + OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} BUILD_SSH=${BUILD_SSH} \ + INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT} HOROVOD_VERSION=${HOROVOD_VERSION} ./install_openmpi_horovod.sh && \ rm -rf install_openmpi_horovod.sh; \ fi +# Remove crypto python packages due to SWLC +RUN if [ "${RELEASE_CONTAINER}" = "yes" ]; then \ + ${PIP} uninstall --yes cryptography && \ + rm -rf /usr/lib/python3/dist-packages/pycrypto-2.6.1.egg-info; \ + fi + # TensorBoard EXPOSE 6006 # IPython diff --git a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh index a0880b0e51c..3f8e9e69a13 100755 --- a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh +++ b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh @@ -57,6 +57,8 @@ BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS:-no} BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS:-no} BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS:-no} BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS:-no} +BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS:-no} +BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS:-no} CONTAINER_PORT=${TF_DOCKER_BUILD_PORT:-8888} BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS:-yes} BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS:-no} @@ -65,10 +67,14 @@ BAZEL_VERSION=${BAZEL_VERSION} BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS:-no} ENABLE_DNNL1=${ENABLE_DNNL1:-no} ENABLE_HOROVOD=${ENABLE_HOROVOD:-no} +INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no} +ENABLE_GCC8=${ENABLE_GCC8:-no} OPENMPI_VERSION=${OPENMPI_VERSION} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL} HOROVOD_VERSION=${HOROVOD_VERSION} +BUILD_SSH=${BUILD_SSH:-no} IS_NIGHTLY=${IS_NIGHTLY:-no} +RELEASE_CONTAINER=${RELEASE_CONTAINER:-no} debug "ROOT_CONTAINER=${ROOT_CONTAINER}" debug "TF_ROOT_CONTAINER_TAG=${TF_ROOT_CONTAINER_TAG}" @@ -80,18 +86,24 @@ debug "BUILD_AVX_CONTAINERS=${BUILD_AVX_CONTAINERS}" debug "BUILD_AVX2_CONTAINERS=${BUILD_AVX2_CONTAINERS}" debug "BUILD_SKX_CONTAINERS=${BUILD_SKX_CONTAINERS}" debug "BUILD_CLX_CONTAINERS=${BUILD_CLX_CONTAINERS}" +debug "BUILD_ICX_CLIENT_CONTAINERS=${BUILD_ICX_CLIENT_CONTAINERS}" +debug "BUILD_ICX_SERVER_CONTAINERS=${BUILD_ICX_SERVER_CONTAINERS}" debug "BUILD_TF_V2_CONTAINERS=${BUILD_TF_V2_CONTAINERS}" debug "BUILD_TF_BFLOAT16_CONTAINERS=${BUILD_TF_BFLOAT16_CONTAINERS}" debug "ENABLE_SECURE_BUILD=${ENABLE_SECURE_BUILD}" debug "TMP_DIR=${TMP_DIR}" debug "BAZEL_VERSION=${BAZEL_VERSION}" +debug "ENABLE_GCC8=${ENABLE_GCC8}" debug "BUILD_PY2_CONTAINERS=${BUILD_PY2_CONTAINERS}" debug "ENABLE_DNNL1=${ENABLE_DNNL1}" debug "ENABLE_HOROVOD=${ENABLE_HOROVOD}" +debug "INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}" debug "OPENMPI_VERSION=${OPENMPI_VERSION}" debug "OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}" debug "HOROVOD_VERSION=${HOROVOD_VERSION}" +debug "BUILD_SSH=${BUILD_SSH}" debug "IS_NIGHTLY=${IS_NIGHTLY}" +debug "RELEASE_CONTAINER=${RELEASE_CONTAINER}" function build_container() { @@ -147,6 +159,8 @@ function build_container() TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_VERSION=${OPENMPI_VERSION}") TF_DOCKER_BUILD_ARGS+=("--build-arg OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL}") TF_DOCKER_BUILD_ARGS+=("--build-arg HOROVOD_VERSION=${HOROVOD_VERSION}") + TF_DOCKER_BUILD_ARGS+=("--build-arg INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT}") + TF_DOCKER_BUILD_ARGS+=("--build-arg BUILD_SSH=${BUILD_SSH}") fi # Add build arg --nightly_flag for the nightly build @@ -154,6 +168,15 @@ function build_container() TF_DOCKER_BUILD_ARGS+=("--build-arg TF_NIGHTLY_FLAG=--nightly_flag") fi + # Add build arg GCC8 install + if [[ ${ENABLE_GCC8} == "yes" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg ENABLE_GCC8=${ENABLE_GCC8}") + fi + + if [[ ${RELEASE_CONTAINER} == "yes" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg RELEASE_CONTAINER=${RELEASE_CONTAINER}") + fi + # Perform docker build debug "Building docker image with image name and tag: ${TEMP_IMAGE_NAME}" CMD="${DOCKER_BINARY} build ${TF_DOCKER_BUILD_ARGS[@]} --no-cache --pull -t ${TEMP_IMAGE_NAME} -f Dockerfile.devel-mkl ." @@ -305,6 +328,14 @@ if [[ ${BUILD_CLX_CONTAINERS} == "yes" ]]; then PLATFORMS+=("icelake") fi +if [[ ${BUILD_ICX_CLIENT_CONTAINERS} == "yes" ]]; then + PLATFORMS+=("icelake-client") +fi + +if [[ ${BUILD_ICX_SERVER_CONTAINERS} == "yes" ]]; then + PLATFORMS+=("icelake-server") +fi + # Checking out sources needs to be done only once checkout_tensorflow "${TF_REPO}" "${TF_BUILD_VERSION}" "${TF_BUILD_VERSION_IS_PR}" @@ -330,6 +361,14 @@ do FINAL_TAG="${FINAL_TAG}-avx512-VNNI" fi + if [[ ${PLATFORM} == "icelake-client" ]]; then + FINAL_TAG="${FINAL_TAG}-icx-client" + fi + + if [[ ${PLATFORM} == "icelake-server" ]]; then + FINAL_TAG="${FINAL_TAG}-icx-server" + fi + # Add -devel-mkl to the image tag FINAL_TAG="${FINAL_TAG}-devel-mkl" if [[ "${PYTHON}" == "python3" ]]; then diff --git a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh index 9bc92ca4fef..6d795b915bf 100755 --- a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh +++ b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh @@ -22,6 +22,8 @@ set -e # Set default OPENMPI_VERSION=${OPENMPI_VERSION:-openmpi-2.1.1} OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL:-https://www.open-mpi.org/software/ompi/v2.1/downloads/openmpi-2.1.1.tar.gz} +INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no} +BUILD_SSH=${BUILD_SSH:-no} HOROVOD_VERSION=${HOROVOD_VERSION:-0.19.1} # Install Open MPI @@ -54,27 +56,52 @@ echo 'OpenMPI version:' mpirun --version # Install OpenSSH for MPI to communicate between containers -apt-get clean && apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - openssh-client openssh-server libnuma-dev && \ - rm -rf /var/lib/apt/lists/* -if [[ $? == "0" ]]; then - echo "PASS: OpenSSH installation" +if [[ ${BUILD_SSH} == "yes" ]]; then + mkdir /tmp/buildssh + cd /tmp/buildssh && curl -fSsL -O http://www.zlib.net/zlib-1.2.11.tar.gz && tar -xzvf zlib-1.2.11.tar.gz && \ + cd /tmp/buildssh/zlib-1.2.11 && ./configure && make && make install + cd /tmp/buildssh && curl -fSsL -O https://www.openssl.org/source/openssl-1.1.1.tar.gz && tar -xzvf openssl-1.1.1.tar.gz && \ + cd /tmp/buildssh/openssl-1.1.1 && ./config && make && make test && make install + cd /tmp/buildssh && curl -fSsL -O https://mirrors.sonic.net/pub/OpenBSD/OpenSSH/portable/openssh-8.4p1.tar.gz && \ + tar -xzvf openssh-8.4p1.tar.gz && cd /tmp/buildssh/openssh-8.4p1 && \ + ./configure --with-md5-passwords && make && \ + groupadd sshd && useradd -M -g sshd -c 'sshd privsep' -d /var/empty -s /sbin/nologin sshd && passwd -l sshd && \ + make install + apt-get clean && apt-get update && \ + apt-get install -y --no-install-recommends --fix-missing \ + libnuma-dev cmake + grep -v StrictHostKeyChecking /usr/local/etc/ssh_config > /usr/local/etc/ssh_config.new + # Allow OpenSSH to talk to containers without asking for confirmation + echo " StrictHostKeyChecking no" >> /usr/local/etc/ssh_config.new + mv /usr/local/etc/ssh_config.new /usr/local/etc/ssh_config else - yum -y update && yum -y install numactl-devel openssh-server openssh-clients && \ - yum clean all - if [[ $? == "0" ]]; then - echo "PASS: OpenSSH installation" - else - echo "Unsupported Linux distribution. Aborting!" && exit 1 - fi + apt-get clean && apt-get update && \ + apt-get install -y --no-install-recommends --fix-missing \ + openssh-client openssh-server libnuma-dev cmake && \ + rm -rf /var/lib/apt/lists/* + if [[ $? == "0" ]]; then + echo "PASS: OpenSSH installation" + else + yum -y update && yum -y install numactl-devel openssh-server openssh-clients cmake && \ + yum clean all + if [[ $? == "0" ]]; then + echo "PASS: OpenSSH installation" + else + echo "Unsupported Linux distribution. Aborting!" && exit 1 + fi + fi + grep -v StrictHostKeyChecking /etc/ssh/ssh_config > /etc/ssh/ssh_config.new + # Allow OpenSSH to talk to containers without asking for confirmation + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config fi mkdir -p /var/run/sshd -# Allow OpenSSH to talk to containers without asking for confirmation -grep -v StrictHostKeyChecking /etc/ssh/ssh_config > /etc/ssh/ssh_config.new -echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new -mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -HOROVOD_WITH_TENSORFLOW=1 -python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +if [[ ${INSTALL_HOROVOD_FROM_COMMIT} == "yes" ]]; then + HOROVOD_WITH_TENSORFLOW=1 + python3 -m pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION} +else + HOROVOD_WITH_TENSORFLOW=1 + python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +fi diff --git a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py index ba5176a4b7a..e070533f039 100755 --- a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py +++ b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py @@ -179,6 +179,43 @@ class CascadelakePlatform(IntelPlatform): return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ CASCADELAKE_ARCH_NEW + " " +class IcelakeClientPlatform(IntelPlatform): + + def __init__(self): + IntelPlatform.__init__(self, 8, 4) + + def get_bazel_gcc_flags(self): + ICELAKE_ARCH_OLD = "skylake-avx512" + ICELAKE_ARCH_NEW = "icelake-client" + AVX512_FLAGS = ["avx512f", "avx512cd"] + if IntelPlatform.use_old_arch_names(self, 8, 4): + ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_OLD + " " + for flag in AVX512_FLAGS: + ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " " + return ret_val + else: + return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_NEW + " " + +class IcelakeServerPlatform(IntelPlatform): + + def __init__(self): + IntelPlatform.__init__(self, 8, 4) + + def get_bazel_gcc_flags(self): + ICELAKE_ARCH_OLD = "skylake-avx512" + ICELAKE_ARCH_NEW = "icelake-server" + AVX512_FLAGS = ["avx512f", "avx512cd"] + if IntelPlatform.use_old_arch_names(self, 8, 4): + ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_OLD + " " + for flag in AVX512_FLAGS: + ret_val += self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + flag + " " + return ret_val + else: + return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ + ICELAKE_ARCH_NEW + " " class BuildEnvSetter(object): """Prepares the proper environment settings for various Intel platforms.""" @@ -189,7 +226,9 @@ class BuildEnvSetter(object): "sandybridge": SandyBridgePlatform(), "haswell": HaswellPlatform(), "skylake": SkylakePlatform(), - "cascadelake": CascadelakePlatform() + "cascadelake": CascadelakePlatform(), + "icelake-client": IcelakeClientPlatform(), + "icelake-server": IcelakeServerPlatform(), } def __init__(self): From 763c2064cade2c2c1d03de829c429ce879e2f291 Mon Sep 17 00:00:00 2001 From: ganand1 <gandhimathi.anand@intel.com> Date: Thu, 31 Dec 2020 19:16:10 -0800 Subject: [PATCH 3/3] Update tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh index adb59c06cd1..7ecb7985466 100755 --- a/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh +++ b/tensorflow/tools/ci_build/linux/mkl/install_openmpi_horovod.sh @@ -71,7 +71,7 @@ if [[ ${BUILD_SSH} == "yes" ]]; then apt-get clean && apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ libnuma-dev cmake - SSH_CONFIG_PATH=/usr/loca/etc/ssh_config + SSH_CONFIG_PATH=/usr/local/etc/ssh_config else apt-get clean && apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \