From d0f9944e2fa3b782064143f0b291d94895dc3ff2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 25 Apr 2020 05:15:54 -0700 Subject: [PATCH] Install multiple python versions simultaneously in our remote build docker. This allows users of this image to build against different python versions, which is a precondition to moving release builds to use RBE and significantly simplifies reproducability of problems due to different python versions. Furthermore, it gets rid of the requirement that the locally installed python version on the user's machine must exactly match the python version installed on the remote image, which makes it very hard to switch presubmits to newer python versions. This patch: - adds a new Dockerfile called ...-multipython; we create a new Dockerfile in order to be able to transition with a flag flip in the build instead of making it necessary to flip all build configurations simultaneously - installs all python versions we care about from source, which makes sure all our python versions are built the same way - if we get them from third-party repositories, they interfere with our system python and are set up slightly differently - adds a script that installs all python dependencies of the build process the same way for all python versions; the old script would pin versions in order to prefer binary packages over source packages; nowadays pip has the option --prefer-binary, which achives the same goal in a much more maintainable fashion - moves the step to link python versions into the sysroot into build_devtoolset.sh - this is not yet optimal, as the Dockerfile decides which python version to provide; it will be addressed in a subsequent patch PiperOrigin-RevId: 308407371 Change-Id: I96af4c2c33159757167b642c7b71772c6fae8873 --- ....cuda10.1-cudnn7-ubuntu16.04-manylinux2010 | 9 -- ...dnn7-ubuntu16.04-manylinux2010-multipython | 87 +++++++++++++++++++ .../ci_build/devtoolset/build_devtoolset.sh | 6 +- .../install/build_and_install_python.sh | 28 ++++++ .../install_pip_packages_by_version.sh | 70 +++++++++++++++ 5 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython create mode 100755 tensorflow/tools/ci_build/install/build_and_install_python.sh create mode 100755 tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 index d2713e8805b..df4b847b6f7 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 @@ -75,15 +75,6 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py RUN python3.8 get-pip.py RUN python3.8 -m pip install --upgrade pip setuptools wheel - -# TODO(klimek): Figure out a better way to get the right include paths -# forwarded when we install new packages. -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m" - -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8" - # Make apt work with python 3.6. RUN cp /usr/lib/python3/dist-packages/apt_pkg.cpython-35m-x86_64-linux-gnu.so \ /usr/lib/python3/dist-packages/apt_pkg.so diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython new file mode 100644 index 00000000000..c14eadcada4 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython @@ -0,0 +1,87 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython \ +# --tag "gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython" . +# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython + +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 as devtoolset + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + rpm2cpio \ + unar \ + wget \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. +RUN /build_devtoolset.sh devtoolset-7 /dt7 +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 +COPY --from=devtoolset /dt7 /dt7 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN apt-get update && apt-get install -y \ + libnvinfer-dev=6.0.1-1+cuda10.1 \ + libnvinfer6=6.0.1-1+cuda10.1 \ + libnvinfer-plugin-dev=6.0.1-1+cuda10.1 \ + libnvinfer-plugin6=6.0.1-1+cuda10.1 \ + && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional dependencies to build Python from source. +RUN apt-get update && apt-get install -y \ + libncurses5-dev \ + libgdbm-dev \ + libnss3-dev \ + libreadline-dev \ + libffi-dev \ + && \ + rm -rf /var/lib/apt/lists/* + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "2.7.17" "--enable-unicode=ucs4" +RUN /install/build_and_install_python.sh "3.5.9" +RUN /install/build_and_install_python.sh "3.6.9" +RUN /install/build_and_install_python.sh "3.7.7" +RUN /install/build_and_install_python.sh "3.8.2" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip2.7" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.5" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.6" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" + +ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" +COPY install/install_latest_clang.sh /install/ +RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/devtoolset/build_devtoolset.sh b/tensorflow/tools/ci_build/devtoolset/build_devtoolset.sh index 4343438f205..1afb14dd160 100755 --- a/tensorflow/tools/ci_build/devtoolset/build_devtoolset.sh +++ b/tensorflow/tools/ci_build/devtoolset/build_devtoolset.sh @@ -132,4 +132,8 @@ cp "./x86_64-pc-linux-gnu/libstdc++-v3/src/.libs/libstdc++_nonshared44.a" \ # TODO(klimek): Automate linking in all non-gcc / non-kernel include # directories. mkdir -p "/${TARGET}/usr/include/x86_64-linux-gnu" -ln -s "/usr/include/x86_64-linux-gnu/python3.5m" "/${TARGET}/usr/include/x86_64-linux-gnu/python3.5m" +PYTHON_VERSIONS=("python2.7" "python3.5m" "python3.6m" "python3.7m" "python3.8") +for v in "${PYTHON_VERSIONS[@]}"; do + ln -s "/usr/local/include/${v}" "/${TARGET}/usr/include/x86_64-linux-gnu/${v}" +done + diff --git a/tensorflow/tools/ci_build/install/build_and_install_python.sh b/tensorflow/tools/ci_build/install/build_and_install_python.sh new file mode 100755 index 00000000000..fb8b6298542 --- /dev/null +++ b/tensorflow/tools/ci_build/install/build_and_install_python.sh @@ -0,0 +1,28 @@ +#!/bin/bash -eu +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +VERSION="$1" +shift + +mkdir /build +cd /build +wget "https://www.python.org/ftp/python/${VERSION}/Python-${VERSION}.tgz" +tar xvzf "Python-${VERSION}.tgz" +cd "Python-${VERSION}" +./configure --enable-optimizations "$@" +make altinstall + +rm -rf /build diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh new file mode 100755 index 00000000000..d9953db3b5a --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh @@ -0,0 +1,70 @@ +#!/bin/bash -eu +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +PIP="$1" +PIP_INSTALL=("${PIP}" "install" "--prefer-binary" --upgrade) + +if [[ ! -x "$(which "${PIP}")" ]]; then + # Python2 doesn't ship with pip by default. + PYTHON="${PIP/pip/python}" + wget "https://bootstrap.pypa.io/get-pip.py" + "${PYTHON}" "get-pip.py" + rm "get-pip.py" +fi + +PACKAGES=( + "wheel" + "setuptools" + "virtualenv" + "six" + "future" + "absl-py" + "werkzeug" + "bleach" + "markdown" + "protobuf" + "numpy" + "scipy" + "scikit-learn" + "pandas" + "psutil" + "py-cpuinfo" + "lazy-object-proxy" + "pylint" + "pycodestyle" + "portpicker" + "grpcio" + "astor" + "gast" + "termcolor" + "keras_preprocessing" + "h5py" + "tf-estimator-nightly" + "tb-nightly" + "argparse" + "dm-tree" +) + +# tf.mock require the following for python2: +if [[ "${PIP}" == *pip2* ]]; then + PACKAGES+=("mock") +fi + +# Get the latest version of pip so it recognize manylinux2010 +"${PIP}" "install" "--upgrade" "pip" + +"${PIP_INSTALL[@]}" "${PACKAGES[@]}" +