From 52ffc792fdef41de20ac60dc61d4e809d83b558d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 15 Nov 2016 23:35:41 -0800 Subject: [PATCH] Migrate TensorFlow to libjpeg-turbo This makes JPEG go 2x faster on x86_64 (k8), arm7, and arm8. On all other CPU targets, e.g. x86, JPEG performance should be the same as it was before. Fixes #4807 Change: 139295768 --- .../core/platform/default/build_config/BUILD | 2 +- tensorflow/core/platform/jpeg.h | 4 +- tensorflow/workspace.bzl | 18 +- third_party/jpeg.BUILD | 416 ++++++++++++++++++ third_party/nasm.BUILD | 115 +++++ 5 files changed, 547 insertions(+), 8 deletions(-) create mode 100644 third_party/jpeg.BUILD create mode 100644 third_party/nasm.BUILD diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index 2cb64c3922c..ca30603f513 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -99,7 +99,7 @@ cc_library( name = "jpeg", copts = tf_copts(), deps = [ - "@jpeg_archive//:jpeg", + "@jpeg//:jpeg", ], ) diff --git a/tensorflow/core/platform/jpeg.h b/tensorflow/core/platform/jpeg.h index 5b083f84ab4..f5b4deed559 100644 --- a/tensorflow/core/platform/jpeg.h +++ b/tensorflow/core/platform/jpeg.h @@ -26,8 +26,8 @@ limitations under the License. #include #include extern "C" { -#include -#include +#include "jerror.h" +#include "jpeglib.h" } #else #error Define the appropriate PLATFORM_ macro for this platform diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index be9801828ef..617101a3060 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -63,11 +63,19 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): ) native.new_http_archive( - name = "jpeg_archive", - url = "http://www.ijg.org/files/jpegsrc.v9a.tar.gz", - sha256 = "3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7", - strip_prefix = "jpeg-9a", - build_file = str(Label("//:jpeg.BUILD")), + name = "nasm", + url = "http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", + sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", + strip_prefix = "nasm-2.12.02", + build_file = str(Label("//third_party:nasm.BUILD")), + ) + + native.new_http_archive( + name = "jpeg", + url = "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", + sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", + strip_prefix = "libjpeg-turbo-1.5.1", + build_file = str(Label("//third_party:jpeg.BUILD")), ) native.new_http_archive( diff --git a/third_party/jpeg.BUILD b/third_party/jpeg.BUILD new file mode 100644 index 00000000000..cbc1e86e51b --- /dev/null +++ b/third_party/jpeg.BUILD @@ -0,0 +1,416 @@ +# Description: +# libjpeg-turbo is a drop in replacement for jpeglib optimized with SIMD. + +licenses(["notice"]) # custom notice-style license, see LICENSE.md + +exports_files(["LICENSE.md"]) + +libjpegturbo_nocopts = "-[W]error" + +libjpegturbo_copts = select({ + ":android": [ + "-O2", + "-fPIE", + "-w", + ], + ":windows": [ + "/Ox", + "/w14711", # function 'function' selected for inline expansion + "/w14710", # 'function' : function not inlined + ], + "//conditions:default": [ + "-O3", + "-w", + ], +}) + select({ + ":armeabi-v7a": [ + "-D__ARM_NEON__", + "-march=armv7-a", + "-mfloat-abi=softfp", + "-fprefetch-loop-arrays", + ], + "//conditions:default": [], +}) + +cc_library( + name = "jpeg", + srcs = [ + "jaricom.c", + "jcapimin.c", + "jcapistd.c", + "jcarith.c", + "jccoefct.c", + "jccolor.c", + "jcdctmgr.c", + "jchuff.c", + "jchuff.h", + "jcinit.c", + "jcmainct.c", + "jcmarker.c", + "jcmaster.c", + "jcomapi.c", + "jconfig.h", + "jconfigint.h", + "jcparam.c", + "jcphuff.c", + "jcprepct.c", + "jcsample.c", + "jctrans.c", + "jdapimin.c", + "jdapistd.c", + "jdarith.c", + "jdatadst.c", + "jdatasrc.c", + "jdcoefct.c", + "jdcoefct.h", + "jdcolor.c", + "jdct.h", + "jddctmgr.c", + "jdhuff.c", + "jdhuff.h", + "jdinput.c", + "jdmainct.c", + "jdmainct.h", + "jdmarker.c", + "jdmaster.c", + "jdmaster.h", + "jdmerge.c", + "jdphuff.c", + "jdpostct.c", + "jdsample.c", + "jdsample.h", + "jdtrans.c", + "jerror.c", + "jfdctflt.c", + "jfdctfst.c", + "jfdctint.c", + "jidctflt.c", + "jidctfst.c", + "jidctint.c", + "jidctred.c", + "jinclude.h", + "jmemmgr.c", + "jmemnobs.c", + "jmemsys.h", + "jpeg_nbits_table.h", + "jpegcomp.h", + "jquant1.c", + "jquant2.c", + "jutils.c", + "jversion.h", + ], + hdrs = [ + "jccolext.c", # should have been named .inc + "jdcol565.c", # should have been named .inc + "jdcolext.c", # should have been named .inc + "jdmrg565.c", # should have been named .inc + "jdmrgext.c", # should have been named .inc + "jerror.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jstdhuff.c", # should have been named .inc + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, + visibility = ["//visibility:public"], + deps = select({ + ":k8": [":simd_x86_64"], + ":armeabi-v7a": [":simd_armv7a"], + ":arm64-v8a": [":simd_armv8a"], + "//conditions:default": [":simd_none"], + }), +) + +cc_library( + name = "simd_x86_64", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jerror.h", + "jinclude.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jccolor-sse2-64.o", + "simd/jcgray-sse2-64.o", + "simd/jchuff-sse2-64.o", + "simd/jcsample-sse2-64.o", + "simd/jdcolor-sse2-64.o", + "simd/jdmerge-sse2-64.o", + "simd/jdsample-sse2-64.o", + "simd/jfdctflt-sse-64.o", + "simd/jfdctfst-sse2-64.o", + "simd/jfdctint-sse2-64.o", + "simd/jidctflt-sse2-64.o", + "simd/jidctfst-sse2-64.o", + "simd/jidctint-sse2-64.o", + "simd/jidctred-sse2-64.o", + "simd/jquantf-sse2-64.o", + "simd/jquanti-sse2-64.o", + "simd/jsimd.h", + "simd/jsimd_x86_64.c", + ], + copts = libjpegturbo_copts, + linkstatic = 1, + nocopts = libjpegturbo_nocopts, +) + +genrule( + name = "simd_x86_64_assemblage23", + srcs = [ + "simd/jccolext-sse2-64.asm", + "simd/jccolor-sse2-64.asm", + "simd/jcgray-sse2-64.asm", + "simd/jcgryext-sse2-64.asm", + "simd/jchuff-sse2-64.asm", + "simd/jcolsamp.inc", + "simd/jcsample-sse2-64.asm", + "simd/jdcolext-sse2-64.asm", + "simd/jdcolor-sse2-64.asm", + "simd/jdct.inc", + "simd/jdmerge-sse2-64.asm", + "simd/jdmrgext-sse2-64.asm", + "simd/jdsample-sse2-64.asm", + "simd/jfdctflt-sse-64.asm", + "simd/jfdctfst-sse2-64.asm", + "simd/jfdctint-sse2-64.asm", + "simd/jidctflt-sse2-64.asm", + "simd/jidctfst-sse2-64.asm", + "simd/jidctint-sse2-64.asm", + "simd/jidctred-sse2-64.asm", + "simd/jpeg_nbits_table.inc", + "simd/jquantf-sse2-64.asm", + "simd/jquanti-sse2-64.asm", + "simd/jsimdcfg.inc", + "simd/jsimdext.inc", + ], + outs = [ + "simd/jccolor-sse2-64.o", + "simd/jcgray-sse2-64.o", + "simd/jchuff-sse2-64.o", + "simd/jcsample-sse2-64.o", + "simd/jdcolor-sse2-64.o", + "simd/jdmerge-sse2-64.o", + "simd/jdsample-sse2-64.o", + "simd/jfdctflt-sse-64.o", + "simd/jfdctfst-sse2-64.o", + "simd/jfdctint-sse2-64.o", + "simd/jidctflt-sse2-64.o", + "simd/jidctfst-sse2-64.o", + "simd/jidctint-sse2-64.o", + "simd/jidctred-sse2-64.o", + "simd/jquantf-sse2-64.o", + "simd/jquanti-sse2-64.o", + ], + cmd = "for out in $(OUTS); do\n" + + " $(location @nasm//:nasm) -f elf64" + + " -DELF -DPIC -DRGBX_FILLER_0XFF -D__x86_64__ -DARCH_X86_64" + + " -I $$(dirname $(location simd/jdct.inc))/" + + " -I $$(dirname $(location simd/jsimdcfg.inc))/" + + " -o $$out" + + " $$(dirname $(location simd/jdct.inc))/$$(basename $${out%.o}.asm)\n" + + "done", + tools = ["@nasm//:nasm"], +) + +cc_library( + name = "simd_armv7a", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jinclude.h", + "jmorecfg.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jsimd.h", + "simd/jsimd_arm.c", + "simd/jsimd_arm_neon.S", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +cc_library( + name = "simd_armv8a", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jinclude.h", + "jmorecfg.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jsimd.h", + "simd/jsimd_arm64.c", + "simd/jsimd_arm64_neon.S", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +cc_library( + name = "simd_none", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jerror.h", + "jinclude.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jsimd.h", + "jsimd_none.c", + "jsimddct.h", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +genrule( + name = "configure", + outs = ["jconfig.h"], + cmd = "cat <<'EOF' >$@\n" + + "#define JPEG_LIB_VERSION 62\n" + + "#define LIBJPEG_TURBO_VERSION 1.5.1\n" + + "#define LIBJPEG_TURBO_VERSION_NUMBER 1005001\n" + + "#define C_ARITH_CODING_SUPPORTED 1\n" + + "#define D_ARITH_CODING_SUPPORTED 1\n" + + "#define BITS_IN_JSAMPLE 8\n" + + "#define HAVE_LOCALE_H 1\n" + + "#define HAVE_STDDEF_H 1\n" + + "#define HAVE_STDLIB_H 1\n" + + "#define HAVE_UNSIGNED_CHAR 1\n" + + "#define HAVE_UNSIGNED_SHORT 1\n" + + "#define MEM_SRCDST_SUPPORTED 1\n" + + "#define NEED_SYS_TYPES_H 1\n" + + select({ + ":k8": "#define WITH_SIMD 1\n", + ":armeabi-v7a": "#define WITH_SIMD 1\n", + ":arm64-v8a": "#define WITH_SIMD 1\n", + "//conditions:default": "", + }) + + "EOF", +) + +genrule( + name = "configure_internal", + outs = ["jconfigint.h"], + cmd = "cat <<'EOF' >$@\n" + + "#define BUILD \"20161115\"\n" + + "#ifdef _MSC_VER /* Windows */\n" + + "#define INLINE __inline\n" + + "#else\n" + + "#define INLINE inline __attribute__((always_inline))\n" + + "#endif\n" + + "#define PACKAGE_NAME \"libjpeg-turbo\"\n" + + "#define VERSION \"1.5.1\"\n" + + "#if (__WORDSIZE==64 && !defined(__native_client__)) || defined(_WIN64)\n" + + "#define SIZEOF_SIZE_T 8\n" + + "#else\n" + + "#define SIZEOF_SIZE_T 4\n" + + "#endif\n" + + "EOF", +) + +# jiminy cricket the way this file is generated is completely outrageous +genrule( + name = "configure_simd", + outs = ["simd/jsimdcfg.inc"], + cmd = "cat <<'EOF' >$@\n" + + "%define DCTSIZE 8\n" + + "%define DCTSIZE2 64\n" + + "%define RGB_RED 0\n" + + "%define RGB_GREEN 1\n" + + "%define RGB_BLUE 2\n" + + "%define RGB_PIXELSIZE 3\n" + + "%define EXT_RGB_RED 0\n" + + "%define EXT_RGB_GREEN 1\n" + + "%define EXT_RGB_BLUE 2\n" + + "%define EXT_RGB_PIXELSIZE 3\n" + + "%define EXT_RGBX_RED 0\n" + + "%define EXT_RGBX_GREEN 1\n" + + "%define EXT_RGBX_BLUE 2\n" + + "%define EXT_RGBX_PIXELSIZE 4\n" + + "%define EXT_BGR_RED 2\n" + + "%define EXT_BGR_GREEN 1\n" + + "%define EXT_BGR_BLUE 0\n" + + "%define EXT_BGR_PIXELSIZE 3\n" + + "%define EXT_BGRX_RED 2\n" + + "%define EXT_BGRX_GREEN 1\n" + + "%define EXT_BGRX_BLUE 0\n" + + "%define EXT_BGRX_PIXELSIZE 4\n" + + "%define EXT_XBGR_RED 3\n" + + "%define EXT_XBGR_GREEN 2\n" + + "%define EXT_XBGR_BLUE 1\n" + + "%define EXT_XBGR_PIXELSIZE 4\n" + + "%define EXT_XRGB_RED 1\n" + + "%define EXT_XRGB_GREEN 2\n" + + "%define EXT_XRGB_BLUE 3\n" + + "%define EXT_XRGB_PIXELSIZE 4\n" + + "%define RGBX_FILLER_0XFF 1\n" + + "%define JSAMPLE byte ; unsigned char\n" + + "%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)\n" + + "%define CENTERJSAMPLE 128\n" + + "%define JCOEF word ; short\n" + + "%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)\n" + + "%define JDIMENSION dword ; unsigned int\n" + + "%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)\n" + + "%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)\n" + + "%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)\n" + + "%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)\n" + + "%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)\n" + + "%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)\n" + + "%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)\n" + + "%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)\n" + + "%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)\n" + + "%define DCTELEM word ; short\n" + + "%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)\n" + + "%define float FP32 ; float\n" + + "%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)\n" + + "%define ISLOW_MULT_TYPE word ; must be short\n" + + "%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)\n" + + "%define IFAST_MULT_TYPE word ; must be short\n" + + "%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)\n" + + "%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors\n" + + "%define FLOAT_MULT_TYPE FP32 ; must be float\n" + + "%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)\n" + + "%define JSIMD_NONE 0x00\n" + + "%define JSIMD_MMX 0x01\n" + + "%define JSIMD_3DNOW 0x02\n" + + "%define JSIMD_SSE 0x04\n" + + "%define JSIMD_SSE2 0x08\n" + + "EOF", +) + +config_setting( + name = "k8", + values = {"cpu": "k8"}, +) + +config_setting( + name = "android", + values = {"crosstool_top": "//external:android/crosstool"}, +) + +config_setting( + name = "armeabi-v7a", + values = {"android_cpu": "armeabi-v7a"}, +) + +config_setting( + name = "arm64-v8a", + values = {"android_cpu": "arm64-v8a"}, +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows_msvc"}, +) diff --git a/third_party/nasm.BUILD b/third_party/nasm.BUILD new file mode 100644 index 00000000000..0450b353944 --- /dev/null +++ b/third_party/nasm.BUILD @@ -0,0 +1,115 @@ +# Description: +# NASM is a portable assembler in the Intel/Microsoft tradition. + +licenses(["notice"]) # BSD 2-clause + +exports_files(["LICENSE"]) + +cc_binary( + name = "nasm", + srcs = [ + "assemble.c", + "assemble.h", + "compiler.h", + "crc64.c", + "directiv.c", + "directiv.h", + "disp8.c", + "disp8.h", + "eval.c", + "eval.h", + "exprlib.c", + "float.c", + "float.h", + "hashtbl.c", + "hashtbl.h", + "iflag.c", + "iflag.h", + "iflaggen.h", + "ilog2.c", + "insns.h", + "insnsa.c", + "insnsb.c", + "insnsi.h", + "labels.c", + "labels.h", + "lib/strlcpy.c", + "listing.c", + "listing.h", + "macros.c", + "md5.h", + "md5c.c", + "nasm.c", + "nasm.h", + "nasmlib.c", + "nasmlib.h", + "opflags.h", + "output/codeview.c", + "output/dwarf.h", + "output/elf.h", + "output/nulldbg.c", + "output/nullout.c", + "output/outaout.c", + "output/outas86.c", + "output/outbin.c", + "output/outcoff.c", + "output/outdbg.c", + "output/outelf.c", + "output/outelf.h", + "output/outelf32.c", + "output/outelf64.c", + "output/outelfx32.c", + "output/outform.c", + "output/outform.h", + "output/outieee.c", + "output/outlib.c", + "output/outlib.h", + "output/outmacho.c", + "output/outobj.c", + "output/outrdf2.c", + "output/pecoff.h", + "output/stabs.h", + "parser.c", + "parser.h", + "pptok.c", + "pptok.h", + "preproc.c", + "preproc.h", + "preproc-nop.c", + "quote.c", + "quote.h", + "raa.c", + "raa.h", + "rbtree.c", + "rbtree.h", + "rdoff/rdoff.h", + "realpath.c", + "regflags.c", + "regs.h", + "regvals.c", + "saa.c", + "saa.h", + "srcfile.c", + "stdscan.c", + "stdscan.h", + "strfunc.c", + "tables.h", + "tokens.h", + "tokhash.c", + "ver.c", + "version.h", + ], + copts = select({ + ":windows": [], + "//conditions:default": [ + "-w", + "-std=c99", + ], + }), + visibility = ["@jpeg//:__pkg__"], +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows_msvc"}, +)